From dd667b381138272ce1462e4ceade696795c020c6 Mon Sep 17 00:00:00 2001 From: Janusz Nykiel Date: Tue, 2 May 2017 21:38:03 +0100 Subject: [PATCH 001/780] Clear sandbox console by replacing the whole div Even with the original fix for #66, the sandbox didn't work for me on macOS 10.12.4. On a hunch (the problem appeared only after clearing the non-empty output once, so...), I've tried different methods of clearing the output, found on StackOverflow (hey, I'm learning! Great book BTW) instead of trying to nudge the output to reappear as in the original fix. This seems to work for me, at least with the latest browsers on macOS and Windows 10. --- html/js/sandbox.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/html/js/sandbox.js b/html/js/sandbox.js index b559df3aa..6fb398131 100644 --- a/html/js/sandbox.js +++ b/html/js/sandbox.js @@ -290,10 +290,12 @@ this.div = div; }; - var safari = /Safari\//.test(navigator.userAgent); - Output.prototype = { - clear: function() { this.div.innerHTML = ""; }, + clear: function() { + var clone = this.div.cloneNode(false); + this.div.parentNode.replaceChild(clone, this.div); + this.div = clone; + }, out: function(type, args) { var wrap = document.createElement("pre"); wrap.className = "sandbox-output-" + type; @@ -306,8 +308,6 @@ wrap.appendChild(represent(arg, 58)); } this.div.appendChild(wrap); - if (safari) - setTimeout(function() { this.div.style.minHeight = ".1em"; }.bind(this), 50); } }; From 2df7d690e1b91bf3429df58c3f26b05c675c3361 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Thu, 11 May 2017 09:11:11 +0200 Subject: [PATCH 002/780] Restructure ancestry.js to avoid virus scanner alerts For some reason they don't like concatenated JSON --- code/ancestry.js | 82 ++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/code/ancestry.js b/code/ancestry.js index 62a0fa7f1..3f5930a19 100644 --- a/code/ancestry.js +++ b/code/ancestry.js @@ -1,44 +1,44 @@ -var ANCESTRY_FILE = "[\n " + [ - '{"name": "Carolus Haverbeke", "sex": "m", "born": 1832, "died": 1905, "father": "Carel Haverbeke", "mother": "Maria van Brussel"}', - '{"name": "Emma de Milliano", "sex": "f", "born": 1876, "died": 1956, "father": "Petrus de Milliano", "mother": "Sophia van Damme"}', - '{"name": "Maria de Rycke", "sex": "f", "born": 1683, "died": 1724, "father": "Frederik de Rycke", "mother": "Laurentia van Vlaenderen"}', - '{"name": "Jan van Brussel", "sex": "m", "born": 1714, "died": 1748, "father": "Jacobus van Brussel", "mother": "Joanna van Rooten"}', - '{"name": "Philibert Haverbeke", "sex": "m", "born": 1907, "died": 1997, "father": "Emile Haverbeke", "mother": "Emma de Milliano"}', - '{"name": "Jan Frans van Brussel", "sex": "m", "born": 1761, "died": 1833, "father": "Jacobus Bernardus van Brussel", "mother":null}', - '{"name": "Pauwels van Haverbeke", "sex": "m", "born": 1535, "died": 1582, "father": "N. van Haverbeke", "mother":null}', - '{"name": "Clara Aernoudts", "sex": "f", "born": 1918, "died": 2012, "father": "Henry Aernoudts", "mother": "Sidonie Coene"}', - '{"name": "Emile Haverbeke", "sex": "m", "born": 1877, "died": 1968, "father": "Carolus Haverbeke", "mother": "Maria Sturm"}', - '{"name": "Lieven de Causmaecker", "sex": "m", "born": 1696, "died": 1724, "father": "Carel de Causmaecker", "mother": "Joanna Claes"}', - '{"name": "Pieter Haverbeke", "sex": "m", "born": 1602, "died": 1642, "father": "Lieven van Haverbeke", "mother":null}', - '{"name": "Livina Haverbeke", "sex": "f", "born": 1692, "died": 1743, "father": "Daniel Haverbeke", "mother": "Joanna de Pape"}', - '{"name": "Pieter Bernard Haverbeke", "sex": "m", "born": 1695, "died": 1762, "father": "Willem Haverbeke", "mother": "Petronella Wauters"}', - '{"name": "Lieven van Haverbeke", "sex": "m", "born": 1570, "died": 1636, "father": "Pauwels van Haverbeke", "mother": "Lievijne Jans"}', - '{"name": "Joanna de Causmaecker", "sex": "f", "born": 1762, "died": 1807, "father": "Bernardus de Causmaecker", "mother":null}', - '{"name": "Willem Haverbeke", "sex": "m", "born": 1668, "died": 1731, "father": "Lieven Haverbeke", "mother": "Elisabeth Hercke"}', - '{"name": "Pieter Antone Haverbeke", "sex": "m", "born": 1753, "died": 1798, "father": "Jan Francies Haverbeke", "mother": "Petronella de Decker"}', - '{"name": "Maria van Brussel", "sex": "f", "born": 1801, "died": 1834, "father": "Jan Frans van Brussel", "mother": "Joanna de Causmaecker"}', - '{"name": "Angela Haverbeke", "sex": "f", "born": 1728, "died": 1734, "father": "Pieter Bernard Haverbeke", "mother": "Livina de Vrieze"}', - '{"name": "Elisabeth Haverbeke", "sex": "f", "born": 1711, "died": 1754, "father": "Jan Haverbeke", "mother": "Maria de Rycke"}', - '{"name": "Lievijne Jans", "sex": "f", "born": 1542, "died": 1582, "father":null, "mother":null}', - '{"name": "Bernardus de Causmaecker", "sex": "m", "born": 1721, "died": 1789, "father": "Lieven de Causmaecker", "mother": "Livina Haverbeke"}', - '{"name": "Jacoba Lammens", "sex": "f", "born": 1699, "died": 1740, "father": "Lieven Lammens", "mother": "Livina de Vrieze"}', - '{"name": "Pieter de Decker", "sex": "m", "born": 1705, "died": 1780, "father": "Joos de Decker", "mother": "Petronella van de Steene"}', - '{"name": "Joanna de Pape", "sex": "f", "born": 1654, "died": 1723, "father": "Vincent de Pape", "mother": "Petronella Wauters"}', - '{"name": "Daniel Haverbeke", "sex": "m", "born": 1652, "died": 1723, "father": "Lieven Haverbeke", "mother": "Elisabeth Hercke"}', - '{"name": "Lieven Haverbeke", "sex": "m", "born": 1631, "died": 1676, "father": "Pieter Haverbeke", "mother": "Anna van Hecke"}', - '{"name": "Martina de Pape", "sex": "f", "born": 1666, "died": 1727, "father": "Vincent de Pape", "mother": "Petronella Wauters"}', - '{"name": "Jan Francies Haverbeke", "sex": "m", "born": 1725, "died": 1779, "father": "Pieter Bernard Haverbeke", "mother": "Livina de Vrieze"}', - '{"name": "Maria Haverbeke", "sex": "m", "born": 1905, "died": 1997, "father": "Emile Haverbeke", "mother": "Emma de Milliano"}', - '{"name": "Petronella de Decker", "sex": "f", "born": 1731, "died": 1781, "father": "Pieter de Decker", "mother": "Livina Haverbeke"}', - '{"name": "Livina Sierens", "sex": "f", "born": 1761, "died": 1826, "father": "Jan Sierens", "mother": "Maria van Waes"}', - '{"name": "Laurentia Haverbeke", "sex": "f", "born": 1710, "died": 1786, "father": "Jan Haverbeke", "mother": "Maria de Rycke"}', - '{"name": "Carel Haverbeke", "sex": "m", "born": 1796, "died": 1837, "father": "Pieter Antone Haverbeke", "mother": "Livina Sierens"}', - '{"name": "Elisabeth Hercke", "sex": "f", "born": 1632, "died": 1674, "father": "Willem Hercke", "mother": "Margriet de Brabander"}', - '{"name": "Jan Haverbeke", "sex": "m", "born": 1671, "died": 1731, "father": "Lieven Haverbeke", "mother": "Elisabeth Hercke"}', - '{"name": "Anna van Hecke", "sex": "f", "born": 1607, "died": 1670, "father": "Paschasius van Hecke", "mother": "Martijntken Beelaert"}', - '{"name": "Maria Sturm", "sex": "f", "born": 1835, "died": 1917, "father": "Charles Sturm", "mother": "Seraphina Spelier"}', - '{"name": "Jacobus Bernardus van Brussel", "sex": "m", "born": 1736, "died": 1809, "father": "Jan van Brussel", "mother": "Elisabeth Haverbeke"}' -].join(",\n ") + "\n]"; +var ANCESTRY_FILE = JSON.stringify([ + {"name": "Carolus Haverbeke", "sex": "m", "born": 1832, "died": 1905, "father": "Carel Haverbeke", "mother": "Maria van Brussel"}, + {"name": "Emma de Milliano", "sex": "f", "born": 1876, "died": 1956, "father": "Petrus de Milliano", "mother": "Sophia van Damme"}, + {"name": "Maria de Rycke", "sex": "f", "born": 1683, "died": 1724, "father": "Frederik de Rycke", "mother": "Laurentia van Vlaenderen"}, + {"name": "Jan van Brussel", "sex": "m", "born": 1714, "died": 1748, "father": "Jacobus van Brussel", "mother": "Joanna van Rooten"}, + {"name": "Philibert Haverbeke", "sex": "m", "born": 1907, "died": 1997, "father": "Emile Haverbeke", "mother": "Emma de Milliano"}, + {"name": "Jan Frans van Brussel", "sex": "m", "born": 1761, "died": 1833, "father": "Jacobus Bernardus van Brussel", "mother":null}, + {"name": "Pauwels van Haverbeke", "sex": "m", "born": 1535, "died": 1582, "father": "N. van Haverbeke", "mother":null}, + {"name": "Clara Aernoudts", "sex": "f", "born": 1918, "died": 2012, "father": "Henry Aernoudts", "mother": "Sidonie Coene"}, + {"name": "Emile Haverbeke", "sex": "m", "born": 1877, "died": 1968, "father": "Carolus Haverbeke", "mother": "Maria Sturm"}, + {"name": "Lieven de Causmaecker", "sex": "m", "born": 1696, "died": 1724, "father": "Carel de Causmaecker", "mother": "Joanna Claes"}, + {"name": "Pieter Haverbeke", "sex": "m", "born": 1602, "died": 1642, "father": "Lieven van Haverbeke", "mother":null}, + {"name": "Livina Haverbeke", "sex": "f", "born": 1692, "died": 1743, "father": "Daniel Haverbeke", "mother": "Joanna de Pape"}, + {"name": "Pieter Bernard Haverbeke", "sex": "m", "born": 1695, "died": 1762, "father": "Willem Haverbeke", "mother": "Petronella Wauters"}, + {"name": "Lieven van Haverbeke", "sex": "m", "born": 1570, "died": 1636, "father": "Pauwels van Haverbeke", "mother": "Lievijne Jans"}, + {"name": "Joanna de Causmaecker", "sex": "f", "born": 1762, "died": 1807, "father": "Bernardus de Causmaecker", "mother":null}, + {"name": "Willem Haverbeke", "sex": "m", "born": 1668, "died": 1731, "father": "Lieven Haverbeke", "mother": "Elisabeth Hercke"}, + {"name": "Pieter Antone Haverbeke", "sex": "m", "born": 1753, "died": 1798, "father": "Jan Francies Haverbeke", "mother": "Petronella de Decker"}, + {"name": "Maria van Brussel", "sex": "f", "born": 1801, "died": 1834, "father": "Jan Frans van Brussel", "mother": "Joanna de Causmaecker"}, + {"name": "Angela Haverbeke", "sex": "f", "born": 1728, "died": 1734, "father": "Pieter Bernard Haverbeke", "mother": "Livina de Vrieze"}, + {"name": "Elisabeth Haverbeke", "sex": "f", "born": 1711, "died": 1754, "father": "Jan Haverbeke", "mother": "Maria de Rycke"}, + {"name": "Lievijne Jans", "sex": "f", "born": 1542, "died": 1582, "father":null, "mother":null}, + {"name": "Bernardus de Causmaecker", "sex": "m", "born": 1721, "died": 1789, "father": "Lieven de Causmaecker", "mother": "Livina Haverbeke"}, + {"name": "Jacoba Lammens", "sex": "f", "born": 1699, "died": 1740, "father": "Lieven Lammens", "mother": "Livina de Vrieze"}, + {"name": "Pieter de Decker", "sex": "m", "born": 1705, "died": 1780, "father": "Joos de Decker", "mother": "Petronella van de Steene"}, + {"name": "Joanna de Pape", "sex": "f", "born": 1654, "died": 1723, "father": "Vincent de Pape", "mother": "Petronella Wauters"}, + {"name": "Daniel Haverbeke", "sex": "m", "born": 1652, "died": 1723, "father": "Lieven Haverbeke", "mother": "Elisabeth Hercke"}, + {"name": "Lieven Haverbeke", "sex": "m", "born": 1631, "died": 1676, "father": "Pieter Haverbeke", "mother": "Anna van Hecke"}, + {"name": "Martina de Pape", "sex": "f", "born": 1666, "died": 1727, "father": "Vincent de Pape", "mother": "Petronella Wauters"}, + {"name": "Jan Francies Haverbeke", "sex": "m", "born": 1725, "died": 1779, "father": "Pieter Bernard Haverbeke", "mother": "Livina de Vrieze"}, + {"name": "Maria Haverbeke", "sex": "m", "born": 1905, "died": 1997, "father": "Emile Haverbeke", "mother": "Emma de Milliano"}, + {"name": "Petronella de Decker", "sex": "f", "born": 1731, "died": 1781, "father": "Pieter de Decker", "mother": "Livina Haverbeke"}, + {"name": "Livina Sierens", "sex": "f", "born": 1761, "died": 1826, "father": "Jan Sierens", "mother": "Maria van Waes"}, + {"name": "Laurentia Haverbeke", "sex": "f", "born": 1710, "died": 1786, "father": "Jan Haverbeke", "mother": "Maria de Rycke"}, + {"name": "Carel Haverbeke", "sex": "m", "born": 1796, "died": 1837, "father": "Pieter Antone Haverbeke", "mother": "Livina Sierens"}, + {"name": "Elisabeth Hercke", "sex": "f", "born": 1632, "died": 1674, "father": "Willem Hercke", "mother": "Margriet de Brabander"}, + {"name": "Jan Haverbeke", "sex": "m", "born": 1671, "died": 1731, "father": "Lieven Haverbeke", "mother": "Elisabeth Hercke"}, + {"name": "Anna van Hecke", "sex": "f", "born": 1607, "died": 1670, "father": "Paschasius van Hecke", "mother": "Martijntken Beelaert"}, + {"name": "Maria Sturm", "sex": "f", "born": 1835, "died": 1917, "father": "Charles Sturm", "mother": "Seraphina Spelier"}, + {"name": "Jacobus Bernardus van Brussel", "sex": "m", "born": 1736, "died": 1809, "father": "Jan van Brussel", "mother": "Elisabeth Haverbeke"} +]) // This makes sure the data is exported in node.js — // `require(./path/to/ancestry.js)` will get you the array. From bcdd03d1f11ce5daca19d9f45c82f88baa011884 Mon Sep 17 00:00:00 2001 From: Simon Morgan Date: Wed, 24 May 2017 12:24:06 +0100 Subject: [PATCH 003/780] require should be passed a string --- code/ancestry.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/ancestry.js b/code/ancestry.js index 3f5930a19..265db937b 100644 --- a/code/ancestry.js +++ b/code/ancestry.js @@ -41,6 +41,6 @@ var ANCESTRY_FILE = JSON.stringify([ ]) // This makes sure the data is exported in node.js — -// `require(./path/to/ancestry.js)` will get you the array. +// `require("./path/to/ancestry.js")` will get you the array. if (typeof module != "undefined" && module.exports) module.exports = ANCESTRY_FILE; From 25cb92108f3df62107b754cff62f304e8677227a Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Tue, 30 May 2017 10:15:48 +0200 Subject: [PATCH 004/780] Update backer link --- html/backers.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html/backers.html b/html/backers.html index a0a1f4ec8..eb6abfc57 100644 --- a/html/backers.html +++ b/html/backers.html @@ -40,7 +40,7 @@

List of Backers
Magnus Skog + Magnus Skog 1000 € From 1e34bbe2ac90ec7b8897836e1280a5f39b98c034 Mon Sep 17 00:00:00 2001 From: CodeVision Date: Mon, 19 Jun 2017 22:41:51 +0200 Subject: [PATCH 005/780] Fix typo in suggested return value --- code/solutions/11_3_comments.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/solutions/11_3_comments.js b/code/solutions/11_3_comments.js index 5e18263e1..58401e145 100644 --- a/code/solutions/11_3_comments.js +++ b/code/solutions/11_3_comments.js @@ -8,5 +8,5 @@ console.log(parse("# hello\nx")); console.log(parse("a # one\n # two\n()")); // → {type: "apply", -// operator: {type: "word", name: "x"}, +// operator: {type: "word", name: "a"}, // args: []} From 8c0d0235a7ada65bbfebdfb803c67cbe77dcebe5 Mon Sep 17 00:00:00 2001 From: jtmcgrath Date: Sun, 14 May 2017 23:03:30 +0300 Subject: [PATCH 006/780] Shorter regex for problem #7. --- code/solutions/09_1_regexp_golf.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/solutions/09_1_regexp_golf.js b/code/solutions/09_1_regexp_golf.js index 6848f0f18..816d4dced 100644 --- a/code/solutions/09_1_regexp_golf.js +++ b/code/solutions/09_1_regexp_golf.js @@ -24,7 +24,7 @@ verify(/\w{7,}/, ["hottentottententen"], ["no", "hotten totten tenten"]); -verify(/\b[a-df-z]+\b/i, +verify(/\b[^\We]+\b/i, ["red platypus", "wobbling nest"], ["earth bed", "learning ape"]); From 5e06d2810197d03b39c672114bbfdac75a7f454e Mon Sep 17 00:00:00 2001 From: Harry Manchanda Date: Sun, 25 Jun 2017 22:42:23 +0530 Subject: [PATCH 007/780] Example is confusing so change it a bit --- 04_data.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/04_data.txt b/04_data.txt index 1a7072e14..fd9e77b00 100644 --- a/04_data.txt +++ b/04_data.txt @@ -97,10 +97,10 @@ is called an _array_ and is written as a list of values between [source,javascript] ---- var listOfNumbers = [2, 3, 5, 7, 11]; -console.log(listOfNumbers[1]); +console.log(listOfNumbers[2]); +// → 5 +console.log(listOfNumbers[2 - 1]); // → 3 -console.log(listOfNumbers[1 - 1]); -// → 2 ---- ((([] (subscript))))(((array,indexing)))The notation for getting From bb9515e2fde86b78ced5d72fab4213d1eec427a3 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Mon, 3 Jul 2017 11:30:15 +0200 Subject: [PATCH 008/780] Fix code sandbox when HTML output is used --- html/js/code.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/html/js/code.js b/html/js/code.js index 8dbf97f34..56b29dbf2 100644 --- a/html/js/code.js +++ b/html/js/code.js @@ -171,7 +171,10 @@ addEventListener("load", function() { sandbox = new SandBox({ loadFiles: hasIncludes(val, context.include) ? [] : context.include, place: type == "html" && - function(node) { outnode.parentNode.insertBefore(node, outnode); } + function(node) { + var out = document.querySelector(".sandbox-output"); + out.parentNode.insertBefore(node, out); + } }, function(box) { output.clear(); if (type == "html") From af8852d3a642546ef1d2631744a2e36ff93460a2 Mon Sep 17 00:00:00 2001 From: Harry Manchanda Date: Wed, 2 Aug 2017 23:56:10 +0530 Subject: [PATCH 009/780] Change `start` to `current` As start is semantically wrong... Its the `current` number! --- 03_functions.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/03_functions.txt b/03_functions.txt index d7aed41bb..5c28d56e3 100644 --- a/03_functions.txt +++ b/03_functions.txt @@ -617,14 +617,14 @@ Here is a recursive solution: [source,javascript] ---- function findSolution(target) { - function find(start, history) { - if (start == target) + function find(current, history) { + if (current == target) return history; - else if (start > target) + else if (current > target) return null; else - return find(start + 5, "(" + history + " + 5)") || - find(start * 3, "(" + history + " * 3)"); + return find(current + 5, "(" + history + " + 5)") || + find(current * 3, "(" + history + " * 3)"); } return find(1, "1"); } From 5a039bc9195199bdbd7f5e74cbcd7a7a87c4d672 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Sun, 3 Sep 2017 13:47:28 +0200 Subject: [PATCH 010/780] Fix additional hole in solution to exercise 20.2 Issue #226 --- 20_node.txt | 15 +++++++-------- code/solutions/20_2_fixing_a_leak.js | 10 ++++++++-- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/20_node.txt b/20_node.txt index 5aa407a15..0b3dcc3ac 100644 --- a/20_node.txt +++ b/20_node.txt @@ -1146,14 +1146,13 @@ system might be used to do bad things to your machine. (((replace method)))(((file server example)))(((leak)))(((period character)))(((slash character)))(((backslash character)))(((decodeURIComponent function)))It is enough to strip out -all occurrences of two dots that have a slash, a backslash, or -the end of the string on both sides. Using the `replace` method with a -((regular expression)) is the easiest way to do this. Do not forget -the `g` flag on the expression, or `replace` will replace only a -single instance, and people could still get around this safety measure -by including additional double dots in their paths! Also make sure you -do the replace _after_ decoding the string, or it would be possible to -foil the check by encoding a dot or a slash. +all occurrences of two dots that have a slash, a backslash, or the end +of the string on both sides. Using the `replace` method with a +((regular expression)) is the easiest way to do this. But since such +instances may overlap (as in `"/../../f"`), you may have to apply +`replace` multiple times, until the string no longer changes. Also +make sure you do the replace _after_ decoding the string, or it would +be possible to foil the check by encoding a dot or a slash. (((path,file system)))(((slash character)))Another potentially worrying case is when paths start with a slash, which are interpreted as diff --git a/code/solutions/20_2_fixing_a_leak.js b/code/solutions/20_2_fixing_a_leak.js index e66f85089..abbcfc865 100644 --- a/code/solutions/20_2_fixing_a_leak.js +++ b/code/solutions/20_2_fixing_a_leak.js @@ -3,6 +3,12 @@ function urlToPath(url) { var path = require("url").parse(url).pathname; - var decoded = decodeURIComponent(path); - return "." + decoded.replace(/(\/|\\)\.\.(\/|\\|$)/g, "/"); + var result = "." + decodeURIComponent(path); + for (;;) { + // Remove any instances of '/../' or similar + var simplified = result.replace(/(\/|\\)\.\.(\/|\\|$)/, "/"); + // Keep doing this until it no longer changes the string + if (simplified == result) return result + result = simplified + } } From 8e4a37d57fa0744398cf9355c6237a39b2e34fdb Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Wed, 13 Sep 2017 16:20:37 +0200 Subject: [PATCH 011/780] Work around asciidoc unicode awfulness Closes #227 --- 20_node.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/20_node.txt b/20_node.txt index 0b3dcc3ac..2ecef921a 100644 --- a/20_node.txt +++ b/20_node.txt @@ -854,8 +854,8 @@ function respondErrorOrNothing(respond) { ---- (((204 (HTTP status code))))(((body (HTTP))))When an ((HTTP)) -((response)) does not contain any data, the status code 204 (“no -content”) can be used to indicate this. Since we need to provide +((response)) does not contain any data, the status code 204 +“no content” can be used to indicate this. Since we need to provide callbacks that either report an error or return a 204 response in a few different situations, I wrote a `respondErrorOrNothing` function that creates such a callback. From 6676423968989d5b30979eeadfdddd2988cda0d2 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Wed, 13 Sep 2017 16:29:34 +0200 Subject: [PATCH 012/780] Slightly tweak chapter 20 to avoid asciidoc bug --- 20_node.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/20_node.txt b/20_node.txt index 2ecef921a..a21ac6ed0 100644 --- a/20_node.txt +++ b/20_node.txt @@ -855,10 +855,10 @@ function respondErrorOrNothing(respond) { (((204 (HTTP status code))))(((body (HTTP))))When an ((HTTP)) ((response)) does not contain any data, the status code 204 -“no content” can be used to indicate this. Since we need to provide -callbacks that either report an error or return a 204 response in a -few different situations, I wrote a `respondErrorOrNothing` function -that creates such a callback. + (“no content”) can be used to indicate this. Since we need to +provide callbacks that either report an error or return a 204 response +in a few different situations, I wrote a `respondErrorOrNothing` +function that creates such a callback. (((file server example)))(((Node.js)))(((PUT method)))This is the handler for `PUT` requests: From 895c061b8c807d7d17732db8343db4823d0a6292 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Sun, 17 Sep 2017 18:35:04 +0200 Subject: [PATCH 013/780] Install a specific version of mime in node example Since version 2 isn't compatible with the example code Issue #228 --- 20_node.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/20_node.txt b/20_node.txt index a21ac6ed0..8cd98abad 100644 --- a/20_node.txt +++ b/20_node.txt @@ -746,7 +746,7 @@ where the server script lives, you'll be able to use `require("mime")` to get access to the library: ---- -$ npm install mime +$ npm install mime@1.4.0 npm http GET https://registry.npmjs.org/mime npm http 304 https://registry.npmjs.org/mime mime@1.2.11 node_modules/mime From 534d080f74db7f1e9264a406605c3040456c0c07 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Sun, 17 Sep 2017 18:37:13 +0200 Subject: [PATCH 014/780] Fix example output of npm install command Issue #228 --- 20_node.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/20_node.txt b/20_node.txt index 8cd98abad..4c2f715cd 100644 --- a/20_node.txt +++ b/20_node.txt @@ -749,7 +749,7 @@ get access to the library: $ npm install mime@1.4.0 npm http GET https://registry.npmjs.org/mime npm http 304 https://registry.npmjs.org/mime -mime@1.2.11 node_modules/mime +mime@1.4.0 node_modules/mime ---- (((404 (HTTP status code))))(((stat function)))When a requested file From 19ae586d9a8836662aa0d1d17206950205e75594 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Tue, 26 Sep 2017 17:27:23 +0200 Subject: [PATCH 015/780] Upgrade dependencies --- .gitignore | 1 + 15_game.txt | 3 ++- Makefile | 4 ++-- bin/run_tests.js | 3 ++- package.json | 14 ++++++-------- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index bee009786..68fd4a51e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ /book_mobile.pdf /html/[012]*.html /html/js/chapter_info.js +/html/js/[012]*.js /html/js/acorn_codemirror.js /code/chapter/* /code/file_server.js diff --git a/15_game.txt b/15_game.txt index cc00268ec..bdcd294d8 100644 --- a/15_game.txt +++ b/15_game.txt @@ -219,7 +219,8 @@ Level.prototype.isFinished = function() { == Actors == -[[vector]] (((Vector type)))(((coordinates)))To store the position and +[[vector]] +(((Vector type)))(((coordinates)))To store the position and size of an actor, we will return to our trusty `Vector` type, which groups an x-coordinate and a y-coordinate into an object. diff --git a/Makefile b/Makefile index 404598d08..dc6719e50 100644 --- a/Makefile +++ b/Makefile @@ -21,8 +21,8 @@ html/js/acorn_codemirror.js: node_modules/codemirror/lib/codemirror.js \ node_modules/codemirror/mode/xml/xml.js \ node_modules/codemirror/mode/htmlmixed/htmlmixed.js \ node_modules/codemirror/addon/edit/matchbrackets.js \ - node_modules/acorn/acorn.js \ - node_modules/acorn/util/walk.js + node_modules/acorn/dist/acorn.js \ + node_modules/acorn/dist/walk.js node_modules/.bin/uglifyjs $? -m -o $@ img/generated/%.png: img/%.svg diff --git a/bin/run_tests.js b/bin/run_tests.js index c6291dab7..9ab4c8703 100644 --- a/bin/run_tests.js +++ b/bin/run_tests.js @@ -241,7 +241,8 @@ function nextSandbox() { report(e); } } else { - require("jsdom").env({ + let {JSDOM} = require("jsdom") + new JSDOM({ url: "http://eloquentjavascript.net/" + file + "#" + i, html: sandbox.html || "", src: [baseCode], diff --git a/package.json b/package.json index e876ae315..a84ba187f 100644 --- a/package.json +++ b/package.json @@ -9,16 +9,14 @@ "url": "git://github.com/marijnh/Eloquent-JavaScript.git" }, "dependencies": { - "acorn": "0.3", - "codemirror": "4", + "acorn": "^5.1.0", + "codemirror": "^5.25.0", "jszip": "^2.5.0", - "uglify-js": "2" + "uglify-js": "^2.0.0" }, "devDependencies": { - "contextify": "kkoopa/contextify", - "jsdom": "0.10", - "canvas": "git://github.com/learnboost/node-canvas", - "promise": "6", - "jszip": "2" + "canvas": "^1.6.7", + "jsdom": "^11.2.0", + "promise": "^8.0.1" } } From c02fe4238c8e12db281c4a4202e9cd79b5c92a12 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Tue, 26 Sep 2017 23:06:29 +0200 Subject: [PATCH 016/780] Start adding tooling to work with Markdown instead of Asciidoc --- package.json | 2 + src/chapter.html | 35 ++++++++++++ src/convert.js | 67 ++++++++++++++++++++++ src/markdown.js | 140 +++++++++++++++++++++++++++++++++++++++++++++ src/pseudo_json.js | 101 ++++++++++++++++++++++++++++++++ src/render_html.js | 80 ++++++++++++++++++++++++++ 6 files changed, 425 insertions(+) create mode 100644 src/chapter.html create mode 100644 src/convert.js create mode 100644 src/markdown.js create mode 100644 src/pseudo_json.js create mode 100644 src/render_html.js diff --git a/package.json b/package.json index a84ba187f..4bb5640a3 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,9 @@ "dependencies": { "acorn": "^5.1.0", "codemirror": "^5.25.0", + "hjson": "^3.1.0", "jszip": "^2.5.0", + "markdown-it": "^8.4.0", "uglify-js": "^2.0.0" }, "devDependencies": { diff --git a/src/chapter.html b/src/chapter.html new file mode 100644 index 000000000..5c351106f --- /dev/null +++ b/src/chapter.html @@ -0,0 +1,35 @@ + + + + + <<t $in.title>> :: Eloquent JavaScript + + + + + + <> + + <> + + +
+ + +> id="<>"<>><>
Chapter <>
<
><>

+ +<> + + + diff --git a/src/convert.js b/src/convert.js new file mode 100644 index 000000000..d05349ec9 --- /dev/null +++ b/src/convert.js @@ -0,0 +1,67 @@ +let fs = require("fs") + +let text = fs.readFileSync(process.argv[2], "utf8") + +function processIndexTerm(term) { + term = term.replace(/\+\+\+,\+\+\+/g, "×") + let terms = term.split(",").map(t => /\W/.test(t) ? JSON.stringify(t.replace(/×/g, ",").replace(/\s/g, " ")) : t) + return terms.length == 1 ? terms[0] : "[" + terms.join(", ") + "]" +} + +text = text + .replace(/^(:\w+:\s*.+\n)+/, function(meta) { + let re = /(?:^|\n):(\w+):\s*(.+)/g, m + let tag = "{{meta" + while (m = re.exec(meta)) + tag += ", " + m[1] + ": " + m[2] + return tag + "}}\n" + }) + .replace(/\n(=+) (.*?) =+\n/g, function(_, depth, title) { + return "\n" + "#".repeat(depth.length) + " " + title + "\n" + }) + .replace(/\nimage::([^\]]+)\[(.*?)\]/g, function(_, url, meta) { + return "\n{{figure, url: " + JSON.stringify(url) + ", " + meta.replace(/="/g, ": \"") + "}}" + }) + .replace(/\n(\[chapterquote=.*?\]\n)?\[quote, ([^\]]+)\]\n____\n([^]*?)____\n/g, function(_, chapter, author, content) { + let match = /([^,]+), (.+)/.exec(author), source = null + if (match) { source = match[2]; author = match[1] } + return "\n{{quote" + (chapter ? ", chapter: true" : "") + ", author: " + JSON.stringify(author) + + (source ? ", source: " + JSON.stringify(source) : "") + "\n\n" + content + "\n}}\n" + }) + .replace(/\n\n+((?:(?!\n\n)[^])*?\(\(\((?:(?!\n\n)[^])*)/g, function(_, para) { + let terms = [] + para = para.replace(/\(\(\(((?:\([^\)]*\)|[^])*?)\)\)\)/g, function(_, content) { + terms.push(content) + return "" + }).replace(/^\s*/, "") + return "\n\n{{index " + terms.map(processIndexTerm).join(", ") + "}}\n\n" + para + }) + .replace(/\bindexsee:\[(.*?),\s*(.*?)\]\s*/g, function(_, term, ref) { + return "{{indexsee " + processIndexTerm(term) + ", " + processIndexTerm(ref) + "}}\n\n" + }) + .replace(/\n(?:\[sandbox="(.*?)"\]\n)?(?:\[source,(.*?)\]\n)?(\[focus=.*?\]\n)?(?:---+|\+\+\++)\n([^]*?)\n(?:---+|\+\+\++)\n/g, function(_, sandbox, type, focus, content) { + let params = [] + if (type != "javascript") params.push(type || "null") + if (focus) params.push("focus") + if (sandbox) params.push("sandbox-" + sandbox) + return "\n```" + params.join(" ") + "\n" + content + "\n```\n" + }) + .replace(/\n\/\/ (?:(start_code)|test: (.*)|include_code (.*))/g, function(_, startCode, test, includeCode) { + if (startCode) return "\n{{startCode}}" + if (test) return "\n{{test " + test + "}}" + return "\n{{includeCode " + JSON.stringify(includeCode) + "}}" + }) + .replace(/\blink:([^\[]+)\[(.*?)\]/g, function(_, url, content) { + return "[" + content + "](" + url + ")" + }) + .replace(/\nifdef::(\w+?)_target\[\]\n([^]*?)\nendif::.*/g, function(_, type, content) { + return "\n{{if " + type + "\n" + content + "\n}}" + }) + .replace(/\+\+(?! |\))((?:(?!\n\n)[^])+)\+\+/g, function(_, text) { + return "_" + text + "_" + }) + .replace(/__((?:(?!\n\n)[^])+)__/g, function(_, text) { + return "_" + text + "_" + }) + +console.log(text) diff --git a/src/markdown.js b/src/markdown.js new file mode 100644 index 000000000..95fc43153 --- /dev/null +++ b/src/markdown.js @@ -0,0 +1,140 @@ +const PJSON = require("./pseudo_json") +const markdownIt = require("markdown-it") + +function parseData(str) { + let tag = /^\s*(\w+)\s*(,\s*)?/.exec(str), obj + if (!tag) return null + if (tag[0].length == str.length) { + obj = {} + } else if (tag[2]) { + try { obj = PJSON.parse("{" + str.slice(tag[0].length) + "}") } + catch(_) { return null } + } else { + obj = {} + try { obj.args = PJSON.parse("[" + str.slice(tag[0].length) + "]") } + catch(_) { return null } + } + obj._ = tag[1] + return obj +} + +function parseBlockMeta(state, startLine, endLine) { + let pos = state.bMarks[startLine] + state.tShift[startLine] + let max = state.eMarks[startLine] + // Check for code block indentation or end of input + if (state.sCount[startLine] - state.blkIndent >= 4 || pos + 4 > max) return false + + // Test for `{{` opening marker + if (state.src.charCodeAt(pos) != 123 || state.src.charCodeAt(pos + 1) != 123) return false + + let content = state.src.slice(pos + 2, max), single + + if (single = /\}\}\s*/.exec(content)) { + let data = parseData(content.slice(0, single.index)) + if (!data) return false + let token = state.push("meta", null, 0) + token.map = [startLine, startLine + 1] + token.attrs = [["data", data]] + state.line++ + return true + } + + let data = parseData(content) + if (!data) return false + + let line = startLine + 1, depth = 0 + for (; line < endLine; line++) { + if (line == endLine) throw new SyntaxError("Unterminated meta block") + let start = state.bMarks[line] + state.tShift[line] + let after = state.src.slice(start, start + 2) + if (after == "{{" && !/\}\}\s*$/.test(state.src.slice(start, state.eMarks[line]))) depth++ + else if (after == "}}") { + if (depth) depth-- + else break + } + } + + let token = state.push("meta_open", null, 1) + token.map = [startLine, line + 1] + token.attrs = [["data", data]] + state.md.block.tokenize(state, startLine + 1, line) + state.push("meta_close", null, -1) + state.line = line + 1 + + return true +} + +function parseInlineMeta(state) { + if (state.src.charCodeAt(state.pos) != 91) return false // '[' + + let max = state.posMax + let end = state.md.helpers.parseLinkLabel(state, state.pos, false) + if (end < 0) return false + + let pos = end + 1 + if (pos >= max || state.src.charCodeAt(pos) != 123) return false // '{' + + let metaEnd = pos + 1, depth = 0 + for (;; metaEnd++) { + if (metaEnd == max) return false + let code = state.src.charCodeAt(metaEnd) + if (code == 125) { // '}' + if (depth) depth-- + else break + } else if (code == 123) { + depth++ + } + } + + let data = parseData(state.src.slice(pos + 1, metaEnd)) + if (!data) return false + + state.pos++ + state.posMax = end + state.push("meta_open", null, 1).attrs = [["data", data]] + state.md.inline.tokenize(state) + state.push("meta_close", null, -1) + state.pos = metaEnd + 1 + state.posMax = max + + return true +} + +function parseIndexTerm(state) { + let max = state.posMax + // Check for opening '((' + if (state.pos >= max + 4 || state.src.charCodeAt(state.pos) != 40 || state.src.charCodeAt(state.pos + 1) != 40) return false + + let start = state.pos + 2, end = start + for (;; end++) { + if (end >= max - 1) return false + if (state.src.charCodeAt(end) == 41 && state.src.charCodeAt(end + 1)) break + } + + let term = state.src.slice(start, end) + + state.push("meta", null, 0).attrs = [["data", {_: "index", args: [term]}]] + state.pending += term + state.pos = end + 2 + return true +} + +let TERMINATOR_RE = /[\n!#$%&*+\-:<=>@[\\\]^_`{}~]|\(\(/ + +function newText(state) { + let len = state.src.slice(state.pos).search(TERMINATOR_RE) + if (len == 0) return false + if (len == -1) len = state.src.length - state.pos + state.pending += state.src.slice(state.pos, state.pos + len) + state.pos += len + return true +} + +function plugin(md) { + md.block.ruler.before("code", "meta", parseBlockMeta) + md.inline.ruler.before("link", "meta", parseInlineMeta) + md.inline.ruler.at("text", newText) + md.inline.ruler.before("strikethrough", "index_term", parseIndexTerm) +} + +module.exports = markdownIt().use(plugin) diff --git a/src/pseudo_json.js b/src/pseudo_json.js new file mode 100644 index 000000000..559b5acee --- /dev/null +++ b/src/pseudo_json.js @@ -0,0 +1,101 @@ +class Stream { + constructor(str) { + this.str = str + this.pos = 0 + } + + err(msg) { + throw new SyntaxError(msg + " at " + this.pos + " in " + JSON.stringify(this.str)) + } + + space() { + for (;;) { + let next = this.next + if (next == 32 || next == 9 || next == 10 || next == 13) this.pos++ + else break + } + } + + get next() { + return this.str.charCodeAt(this.pos) + } + + ahead(n) { + this.pos += n + this.space() + } +} + +exports.parse = function(str) { + let stream = new Stream(str) + stream.space() + let value = parseValue(stream) + if (stream.pos != stream.str.length) stream.err("Extra characters at end of input") + return value +} + +function parseValue(stream) { + let next = stream.next + if (next == 123) return parseObj(stream) + if (next == 91) return parseArr(stream) + if (next == 34) return parseStr(stream) + return parseWord(stream) +} + +function parseObj(stream) { + stream.ahead(1) + let obj = {} + for (;;) { + if (stream.next == 125) break + let prop = parseWord(stream, true) + if (stream.next != 58) stream.err("Expected ':'") + stream.ahead(1) + obj[prop] = parseValue(stream) + if (stream.next == 44) stream.ahead(1) + } + stream.ahead(1) + return obj +} + +function parseArr(stream) { + stream.ahead(1) + let arr = [] + for (;;) { + if (stream.next == 93) break + arr.push(parseValue(stream)) + if (stream.next == 44) stream.ahead(1) + } + stream.ahead(1) + return arr +} + +function parseStr(stream) { + let start = stream.pos + stream.pos++ + for (let escaped = false;;) { + let next = stream.next + stream.pos++ + if (next == 34 && !escaped) break + else if (isNaN(next)) stream.error("Unterminated string") + escaped = next == 92 + } + stream.space() + return JSON.parse(stream.str.slice(start, stream.pos)) +} + +function parseWord(stream, prop) { + let start = stream.pos + for (;;) { + let next = stream.next + if ((next >= 97 && next <= 122) || (next >= 65 && next <= 90) || next == 95 || (next >= 48 && next <= 57)) stream.pos++ + else break + } + let word = stream.str.slice(start, stream.pos) + stream.space() + if (/^(?:0x[\da-f]+|\d*(?:\.\d*)?(?:[eE][+\-]?\d+)?)$/i.test(word)) return JSON.parse(word) + if (!prop) { + if (word == "true") return true + if (word == "false") return false + } + return word +} diff --git a/src/render_html.js b/src/render_html.js new file mode 100644 index 000000000..0185c34b7 --- /dev/null +++ b/src/render_html.js @@ -0,0 +1,80 @@ +let fs = require("fs"), mold = new (require("mold")) + +let tokens = require("./markdown").parse(fs.readFileSync(process.argv[2], "utf8"), {}) + +function escapeChar(ch) { + return ch == "<" ? "<" : ch == ">" ? ">" : ch == "&" ? "&" : """ +} +function escape(str) { return str.replace(/[<>&"]/g, escapeChar) } + +let renderer = { + code_inline(token) { return `${escape(token.content)}` }, + + // FIXME languages + fence(token) { return `\n\n
${escape(token.content)}
` }, + + hardbreak() { return "
" }, + + softbreak() { return " " }, + + text(token) { return escape(token.content) }, + + paragraph_open() { return "\n\n

" }, + + paragraph_close() { return "

" }, + + heading_open(token) { return `\n\n<${token.tag}>` }, + + heading_close(token) { return `` }, + + strong_open() { return "" }, + + strong_close() { return "" }, + + em_open() { return "" }, + + em_close() { return "" }, + + link_open(token) { + let alt = token.attrGet("alt"), href= token.attrGet("href") + return `` + }, + + link_close() { return "" }, + + inline(token) { return renderArray(token.children) }, + + meta() { return "" }, + meta_open() { return "" }, + meta_close() { return "" } +} + +function render(token) { + let f = renderer[token.type] + if (!f) throw new Error("No render function for " + token.type) + return f(token) +} + +function renderArray(tokens) { + let result = "" + for (let i = 0; i < tokens.length; i++) result += render(tokens[i]) + return result +} + +let args = {} +for (let i = 0; i < tokens.length; i++) { + let tok = tokens[i] + if (tok.type == "meta" && tok.attrGet("data")._ == "meta") { + let data = tok.attrGet("data") + for (let prop in data) args[prop] = data[prop] + } else if (tok.tag == "h1") { + if (tokens[i + 2].tag != "h1") throw new Error("Complex H1 not supported") + args.title = tokens[i + 1].children[0].content + tokens.splice(i--, 3) + } +} +args.content = renderArray(tokens) + +let template = mold.bake("chapter", fs.readFileSync(__dirname + "/chapter.html", "utf8")) + +console.log(template(args)) From 486e9371f3b4bfc2729f6f9356990b454ba4a404 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Tue, 26 Sep 2017 23:27:49 +0200 Subject: [PATCH 017/780] Add hash ids and syntax highlighting to html renderer --- src/render_html.js | 61 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/src/render_html.js b/src/render_html.js index 0185c34b7..e51168921 100644 --- a/src/render_html.js +++ b/src/render_html.js @@ -1,4 +1,9 @@ let fs = require("fs"), mold = new (require("mold")) +let CodeMirror = require("codemirror/addon/runmode/runmode.node.js") +require("codemirror/mode/javascript/javascript.js") +require("codemirror/mode/xml/xml.js") +require("codemirror/mode/css/css.js") +require("codemirror/mode/htmlmixed/htmlmixed.js") let tokens = require("./markdown").parse(fs.readFileSync(process.argv[2], "utf8"), {}) @@ -7,11 +12,50 @@ function escapeChar(ch) { } function escape(str) { return str.replace(/[<>&"]/g, escapeChar) } +function hashContent(token, firstLast) { + let text = "" + if (token.children) { + for (let i = 0; i < token.children.length; i++) + if (token.children[i].type == "text") text += token.children[i].content + } else { + text = token.content + } + if (firstLast) text = startAndEnd(text) + + let sum = require("crypto").createHash("sha1") + sum.update(text) + return sum.digest("base64").slice(0, 10) +} + +function startAndEnd(text) { + var words = text.split(/\W+/); + if (!words[0]) words.shift(); + if (!words[words.length - 1]) words.pop(); + if (words.length <= 6) return words.join(" "); + return words.slice(0, 3).join(" ") + " " + words.slice(words.length - 3).join(" "); +} + +function highlight(lang, text) { + if (lang == "html") lang = "text/html" + let result = "" + CodeMirror.runMode(text, lang, (text, style) => { + let esc = escape(text) + result += style ? `${esc}` : esc + }) + return result +} + let renderer = { code_inline(token) { return `${escape(token.content)}` }, - // FIXME languages - fence(token) { return `\n\n
${escape(token.content)}
` }, + fence(token) { + let focus = false, sandbox = null, lang = token.info.replace(/\s*\b(focus|sandbox-\w+)\b/g, (_, word) => { + if (word == "focus") focus = true + else sandbox = word.slice(8) + return "" + }) || "javascript" + return `\n\n
${highlight(lang, token.content)}
` + }, hardbreak() { return "
" }, @@ -19,11 +63,11 @@ let renderer = { text(token) { return escape(token.content) }, - paragraph_open() { return "\n\n

" }, + paragraph_open(token, array, index) { return `\n\n

` }, paragraph_close() { return "

" }, - heading_open(token) { return `\n\n<${token.tag}>` }, + heading_open(token, array, index) { return `\n\n<${token.tag} id="h_${hashContent(array[index + 1])}">` }, heading_close(token) { return `` }, @@ -50,14 +94,15 @@ let renderer = { } function render(token) { - let f = renderer[token.type] - if (!f) throw new Error("No render function for " + token.type) - return f(token) } function renderArray(tokens) { let result = "" - for (let i = 0; i < tokens.length; i++) result += render(tokens[i]) + for (let i = 0; i < tokens.length; i++) { + let token = tokens[i], f = renderer[token.type] + if (!f) throw new Error("No render function for " + token.type) + result += f(token, tokens, i) + } return result } From 597e48d4fb3425488452c09fcfd0b7274990f182 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Thu, 28 Sep 2017 15:34:21 +0200 Subject: [PATCH 018/780] Improve HTML rendering, supporting most meta declarations --- src/chapter.html | 14 +++---- src/convert.js | 10 +++-- src/markdown.js | 30 ++++++------- src/render_html.js | 78 ++++++++++++++-------------------- src/transform.js | 102 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 162 insertions(+), 72 deletions(-) create mode 100644 src/transform.js diff --git a/src/chapter.html b/src/chapter.html index 5c351106f..31443cb80 100644 --- a/src/chapter.html +++ b/src/chapter.html @@ -10,17 +10,17 @@ <> <> diff --git a/src/convert.js b/src/convert.js index d05349ec9..b4dc77dc0 100644 --- a/src/convert.js +++ b/src/convert.js @@ -23,10 +23,10 @@ text = text return "\n{{figure, url: " + JSON.stringify(url) + ", " + meta.replace(/="/g, ": \"") + "}}" }) .replace(/\n(\[chapterquote=.*?\]\n)?\[quote, ([^\]]+)\]\n____\n([^]*?)____\n/g, function(_, chapter, author, content) { - let match = /([^,]+), (.+)/.exec(author), source = null - if (match) { source = match[2]; author = match[1] } + let match = /([^,]+), (.+)/.exec(author), title = null + if (match) { title = match[2]; author = match[1] } return "\n{{quote" + (chapter ? ", chapter: true" : "") + ", author: " + JSON.stringify(author) + - (source ? ", source: " + JSON.stringify(source) : "") + "\n\n" + content + "\n}}\n" + (title ? ", title: " + JSON.stringify(title) : "") + "\n\n" + content + "\n}}\n" }) .replace(/\n\n+((?:(?!\n\n)[^])*?\(\(\((?:(?!\n\n)[^])*)/g, function(_, para) { let terms = [] @@ -63,5 +63,9 @@ text = text .replace(/__((?:(?!\n\n)[^])+)__/g, function(_, text) { return "_" + text + "_" }) + .replace(/\n\[\[(.*?)\]\]\n/g, function(_, name) { + return `\n{{id ${/\W/.test(name) ? JSON.stringify(name) : name}}}\n` + }) + .replace(/\[sic]/, "\\[sic]") console.log(text) diff --git a/src/markdown.js b/src/markdown.js index 95fc43153..bc60a72eb 100644 --- a/src/markdown.js +++ b/src/markdown.js @@ -32,9 +32,9 @@ function parseBlockMeta(state, startLine, endLine) { if (single = /\}\}\s*/.exec(content)) { let data = parseData(content.slice(0, single.index)) if (!data) return false - let token = state.push("meta", null, 0) + let token = state.push("meta_" + data._, null, 0) token.map = [startLine, startLine + 1] - token.attrs = [["data", data]] + token.data = data state.line++ return true } @@ -54,17 +54,17 @@ function parseBlockMeta(state, startLine, endLine) { } } - let token = state.push("meta_open", null, 1) + let token = state.push("meta_" + data._ + "_open", null, 1) token.map = [startLine, line + 1] - token.attrs = [["data", data]] + token.data = data state.md.block.tokenize(state, startLine + 1, line) - state.push("meta_close", null, -1) + state.push("meta_" + data._ + "_close", null, -1).data = data state.line = line + 1 return true } -function parseInlineMeta(state) { +function parseInlineMeta(state, silent) { if (state.src.charCodeAt(state.pos) != 91) return false // '[' let max = state.posMax @@ -91,16 +91,16 @@ function parseInlineMeta(state) { state.pos++ state.posMax = end - state.push("meta_open", null, 1).attrs = [["data", data]] + if (!silent) state.push("meta_" + data._ + "_open", null, 1).data = data state.md.inline.tokenize(state) - state.push("meta_close", null, -1) + if (!silent) state.push("meta_" + data._ + "_close", null, -1).data = data state.pos = metaEnd + 1 state.posMax = max return true } -function parseIndexTerm(state) { +function parseIndexTerm(state, silent) { let max = state.posMax // Check for opening '((' if (state.pos >= max + 4 || state.src.charCodeAt(state.pos) != 40 || state.src.charCodeAt(state.pos + 1) != 40) return false @@ -113,19 +113,19 @@ function parseIndexTerm(state) { let term = state.src.slice(start, end) - state.push("meta", null, 0).attrs = [["data", {_: "index", args: [term]}]] + if (!silent) state.push("meta_index", null, 0).data = {_: "index", args: [term]} state.pending += term state.pos = end + 2 return true } -let TERMINATOR_RE = /[\n!#$%&*+\-:<=>@[\\\]^_`{}~]|\(\(/ +let TERMINATOR_RE = /[\n!#$%&*+\-:<=>@\[\\\]^_`{}~]|\(\(/ -function newText(state) { - let len = state.src.slice(state.pos).search(TERMINATOR_RE) +function newText(state, silent) { + let len = state.src.slice(state.pos, state.posMax).search(TERMINATOR_RE) if (len == 0) return false - if (len == -1) len = state.src.length - state.pos - state.pending += state.src.slice(state.pos, state.pos + len) + if (len == -1) len = state.posMax - state.pos + if (!silent) state.pending += state.src.slice(state.pos, state.pos + len) state.pos += len return true } diff --git a/src/render_html.js b/src/render_html.js index e51168921..6ae486618 100644 --- a/src/render_html.js +++ b/src/render_html.js @@ -1,40 +1,22 @@ let fs = require("fs"), mold = new (require("mold")) +let {transformTokens} = require("./transform") let CodeMirror = require("codemirror/addon/runmode/runmode.node.js") require("codemirror/mode/javascript/javascript.js") require("codemirror/mode/xml/xml.js") require("codemirror/mode/css/css.js") require("codemirror/mode/htmlmixed/htmlmixed.js") -let tokens = require("./markdown").parse(fs.readFileSync(process.argv[2], "utf8"), {}) +let {tokens, metadata} = transformTokens(require("./markdown").parse(fs.readFileSync(process.argv[2], "utf8"), {}), { + defined: ["interactive", "html"], + ids: true, + index: false +}) function escapeChar(ch) { return ch == "<" ? "<" : ch == ">" ? ">" : ch == "&" ? "&" : """ } function escape(str) { return str.replace(/[<>&"]/g, escapeChar) } -function hashContent(token, firstLast) { - let text = "" - if (token.children) { - for (let i = 0; i < token.children.length; i++) - if (token.children[i].type == "text") text += token.children[i].content - } else { - text = token.content - } - if (firstLast) text = startAndEnd(text) - - let sum = require("crypto").createHash("sha1") - sum.update(text) - return sum.digest("base64").slice(0, 10) -} - -function startAndEnd(text) { - var words = text.split(/\W+/); - if (!words[0]) words.shift(); - if (!words[words.length - 1]) words.pop(); - if (words.length <= 6) return words.join(" "); - return words.slice(0, 3).join(" ") + " " + words.slice(words.length - 3).join(" "); -} - function highlight(lang, text) { if (lang == "html") lang = "text/html" let result = "" @@ -45,6 +27,14 @@ function highlight(lang, text) { return result } +function anchor(token) { + return token.hashID ? `` : "" +} + +function attrs(token) { + return token.attrs ? token.attrs.map(([name, val]) => ` ${name}="${escape(val)}"`).join("") : "" +} + let renderer = { code_inline(token) { return `${escape(token.content)}` }, @@ -54,7 +44,7 @@ let renderer = { else sandbox = word.slice(8) return "" }) || "javascript" - return `\n\n
${highlight(lang, token.content)}
` + return `\n\n${anchor(token)}${highlight(lang, token.content)}` }, hardbreak() { return "
" }, @@ -63,11 +53,11 @@ let renderer = { text(token) { return escape(token.content) }, - paragraph_open(token, array, index) { return `\n\n

` }, + paragraph_open(token) { return `\n\n${anchor(token)}` }, paragraph_close() { return "

" }, - heading_open(token, array, index) { return `\n\n<${token.tag} id="h_${hashContent(array[index + 1])}">` }, + heading_open(token) { return `\n\n<${token.tag}${attrs(token)}>${anchor(token)}` }, heading_close(token) { return `` }, @@ -88,12 +78,18 @@ let renderer = { inline(token) { return renderArray(token.children) }, - meta() { return "" }, - meta_open() { return "" }, - meta_close() { return "" } -} + meta_figure(token) { + let {url, alt} = token.data + return `
${escape(alt)}
` + }, -function render(token) { + meta_quote_open() { return "\n\n
" }, + + meta_quote_close(token) { + let {author, title} = token.data + return (author ? `\n\n
${escape(author)}${title ? `, ${escape(title)}` : ""}` : "") + + "\n\n
" + } } function renderArray(tokens) { @@ -101,25 +97,13 @@ function renderArray(tokens) { for (let i = 0; i < tokens.length; i++) { let token = tokens[i], f = renderer[token.type] if (!f) throw new Error("No render function for " + token.type) - result += f(token, tokens, i) + result += f(token) } return result } -let args = {} -for (let i = 0; i < tokens.length; i++) { - let tok = tokens[i] - if (tok.type == "meta" && tok.attrGet("data")._ == "meta") { - let data = tok.attrGet("data") - for (let prop in data) args[prop] = data[prop] - } else if (tok.tag == "h1") { - if (tokens[i + 2].tag != "h1") throw new Error("Complex H1 not supported") - args.title = tokens[i + 1].children[0].content - tokens.splice(i--, 3) - } -} -args.content = renderArray(tokens) +metadata.content = renderArray(tokens) let template = mold.bake("chapter", fs.readFileSync(__dirname + "/chapter.html", "utf8")) -console.log(template(args)) +console.log(template(metadata)) diff --git a/src/transform.js b/src/transform.js new file mode 100644 index 000000000..fa3010108 --- /dev/null +++ b/src/transform.js @@ -0,0 +1,102 @@ +function childrenText(children) { + let text = "" + for (let i = 0; i < children.length; i++) + if (children[i].type == "text") text += children[i].content + return text +} + +function hash(text) { + let sum = require("crypto").createHash("sha1") + sum.update(text) + return sum.digest("base64").slice(0, 10) +} + +function startAndEnd(text) { + var words = text.split(/\W+/); + if (!words[0]) words.shift(); + if (!words[words.length - 1]) words.pop(); + if (words.length <= 6) return words.join(" "); + return words.slice(0, 3).join(" ") + " " + words.slice(words.length - 3).join(" "); +} + +function tokenText(token) { + if (token.type == "text") return token.content + else if (token.type == "softbreak") return " " +} + +function smartQuotes(tokens, i) { + let text = tokens[i].content, from = 0 + for (let j = i - 1, tt; j >= 0; j--) if (tt = tokenText(tokens[j])) { + text = tt + text + from = tt.length + break + } + let to = text.length + for (let j = i + 1, tt; j < tokens.length; j++) if (tt = tokenText(tokens[j])) { + text += tt + break + } + + return text + .replace(/([\w\.!?])'/g, "$1’") + .replace(/'(\w)/g, "‘$1") + .replace(/([\w\.!?])"/g, "$1”") + .replace(/"(\w)/g, "“$1") + .slice(from, to) +} + +function transformInline(tokens, options) { + let result = [] + for (let i = 0; i < tokens.length; i++) { + let tok = tokens[i], type = tok.type + if (options.index === false && type == "meta_index") { + // Drop + } else { + if (type == "text" && /[\'\"]/.test(tok.content)) tok.content = smartQuotes(tokens, i) + result.push(tok) + } + } + return result +} + +exports.transformTokens = function(tokens, options) { + let meta = {}, result = [] + for (let i = 0; i < tokens.length; i++) { + let tok = tokens[i], type = tok.type + if (type == "meta_meta") { + for (let prop in tok.data) if (prop != "_") meta[prop] = tok.data[prop] + } else if (type == "meta_id") { + for (let j = i + 1; j < tokens.length; j++) if (tokens[j].tag) { + ;(tokens[j].attrs || (tokens[j].attrs = [])).push(["id", tok.data.args[0]]) + break + } + } else if (type == "meta_if_open") { + let tag = tok.data.args[0] + if (options.defined.indexOf(tag) == -1) { + for (let j = i + 1; j < tokens.length; j++) if (tokens[j].type == "meta_if_close" && tokens[j].data.args[0] == tag) { + i = j + break + } + } + } else if (type == "meta_if_close" || type == "meta_startCode" || type == "meta_includeCode" || type == "meta_test" || + (options.index === false && (type == "meta_indexsee" || type == "meta_index"))) { + // Drop + } else if (tok.tag == "h1") { + if (tokens[i + 1].children.length != 1) throw new Error("Complex H1 not supported") + meta.title = tokens[i + 1].children[0].content + i += 2 + } else { + if (type == "paragraph_open") + tok.hashID = "p_" + hash(startAndEnd(childrenText(tokens[i + 1]))) + else if (type == "heading_open") + tok.hashID = "h_" + hash(childrenText(tokens[i + 1])) + else if (type == "fence") + tok.hashID = "c_" + hash(tok.content) + + if (tok.children) tok.children = transformInline(tok.children, options) + + result.push(tok) + } + } + return {tokens: result, metadata: meta} +} From c7e4444ba3c75e844db92ddbdf340e29c00d04ab Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Thu, 28 Sep 2017 15:48:14 +0200 Subject: [PATCH 019/780] Simplify representation of meta tag args --- src/convert.js | 14 ++++++-------- src/markdown.js | 31 +++++++++++++------------------ src/render_html.js | 4 ++-- src/transform.js | 8 ++++---- 4 files changed, 25 insertions(+), 32 deletions(-) diff --git a/src/convert.js b/src/convert.js index b4dc77dc0..896d292f6 100644 --- a/src/convert.js +++ b/src/convert.js @@ -10,23 +10,21 @@ function processIndexTerm(term) { text = text .replace(/^(:\w+:\s*.+\n)+/, function(meta) { - let re = /(?:^|\n):(\w+):\s*(.+)/g, m - let tag = "{{meta" - while (m = re.exec(meta)) - tag += ", " + m[1] + ": " + m[2] - return tag + "}}\n" + let re = /(?:^|\n):(\w+):\s*(.+)/g, m, props = [] + while (m = re.exec(meta)) props.push(m[1] + ": " + m[2]) + return `{{meta {${props.join(", ")}}}}` }) .replace(/\n(=+) (.*?) =+\n/g, function(_, depth, title) { return "\n" + "#".repeat(depth.length) + " " + title + "\n" }) .replace(/\nimage::([^\]]+)\[(.*?)\]/g, function(_, url, meta) { - return "\n{{figure, url: " + JSON.stringify(url) + ", " + meta.replace(/="/g, ": \"") + "}}" + return "\n{{figure {url: " + JSON.stringify(url) + ", " + meta.replace(/="/g, ": \"") + "}}}" }) .replace(/\n(\[chapterquote=.*?\]\n)?\[quote, ([^\]]+)\]\n____\n([^]*?)____\n/g, function(_, chapter, author, content) { let match = /([^,]+), (.+)/.exec(author), title = null if (match) { title = match[2]; author = match[1] } - return "\n{{quote" + (chapter ? ", chapter: true" : "") + ", author: " + JSON.stringify(author) + - (title ? ", title: " + JSON.stringify(title) : "") + "\n\n" + content + "\n}}\n" + return "\n{{quote {" + (chapter ? "chapter: true, " : "") + "author: " + JSON.stringify(author) + + (title ? ", title: " + JSON.stringify(title) : "") + "}\n\n" + content + "\n}}\n" }) .replace(/\n\n+((?:(?!\n\n)[^])*?\(\(\((?:(?!\n\n)[^])*)/g, function(_, para) { let terms = [] diff --git a/src/markdown.js b/src/markdown.js index bc60a72eb..0d04f32a6 100644 --- a/src/markdown.js +++ b/src/markdown.js @@ -2,20 +2,15 @@ const PJSON = require("./pseudo_json") const markdownIt = require("markdown-it") function parseData(str) { - let tag = /^\s*(\w+)\s*(,\s*)?/.exec(str), obj + let tag = /^\s*(\w+)\s*?/.exec(str), args if (!tag) return null if (tag[0].length == str.length) { - obj = {} - } else if (tag[2]) { - try { obj = PJSON.parse("{" + str.slice(tag[0].length) + "}") } - catch(_) { return null } + args = [] } else { - obj = {} - try { obj.args = PJSON.parse("[" + str.slice(tag[0].length) + "]") } + try { args = PJSON.parse("[" + str.slice(tag[0].length) + "]") } catch(_) { return null } } - obj._ = tag[1] - return obj + return {tag: tag[1], args} } function parseBlockMeta(state, startLine, endLine) { @@ -29,12 +24,12 @@ function parseBlockMeta(state, startLine, endLine) { let content = state.src.slice(pos + 2, max), single - if (single = /\}\}\s*/.exec(content)) { + if (single = /\}\}\s*$/.exec(content)) { let data = parseData(content.slice(0, single.index)) if (!data) return false - let token = state.push("meta_" + data._, null, 0) + let token = state.push("meta_" + data.tag, null, 0) token.map = [startLine, startLine + 1] - token.data = data + token.args = data.args state.line++ return true } @@ -54,11 +49,11 @@ function parseBlockMeta(state, startLine, endLine) { } } - let token = state.push("meta_" + data._ + "_open", null, 1) + let token = state.push("meta_" + data.tag + "_open", null, 1) token.map = [startLine, line + 1] - token.data = data + token.args = data.args state.md.block.tokenize(state, startLine + 1, line) - state.push("meta_" + data._ + "_close", null, -1).data = data + state.push("meta_" + data.tag + "_close", null, -1).args = data.args state.line = line + 1 return true @@ -91,9 +86,9 @@ function parseInlineMeta(state, silent) { state.pos++ state.posMax = end - if (!silent) state.push("meta_" + data._ + "_open", null, 1).data = data + if (!silent) state.push("meta_" + data.tag + "_open", null, 1).args = data.args state.md.inline.tokenize(state) - if (!silent) state.push("meta_" + data._ + "_close", null, -1).data = data + if (!silent) state.push("meta_" + data.tag + "_close", null, -1).args = data.args state.pos = metaEnd + 1 state.posMax = max @@ -113,7 +108,7 @@ function parseIndexTerm(state, silent) { let term = state.src.slice(start, end) - if (!silent) state.push("meta_index", null, 0).data = {_: "index", args: [term]} + if (!silent) state.push("meta_index", null, 0).args = [term] state.pending += term state.pos = end + 2 return true diff --git a/src/render_html.js b/src/render_html.js index 6ae486618..e6a094701 100644 --- a/src/render_html.js +++ b/src/render_html.js @@ -79,14 +79,14 @@ let renderer = { inline(token) { return renderArray(token.children) }, meta_figure(token) { - let {url, alt} = token.data + let {url, alt} = token.args[0] return `
${escape(alt)}
` }, meta_quote_open() { return "\n\n
" }, meta_quote_close(token) { - let {author, title} = token.data + let {author, title} = token.args[0] return (author ? `\n\n
${escape(author)}${title ? `, ${escape(title)}` : ""}` : "") + "\n\n
" } diff --git a/src/transform.js b/src/transform.js index fa3010108..b69672edd 100644 --- a/src/transform.js +++ b/src/transform.js @@ -64,16 +64,16 @@ exports.transformTokens = function(tokens, options) { for (let i = 0; i < tokens.length; i++) { let tok = tokens[i], type = tok.type if (type == "meta_meta") { - for (let prop in tok.data) if (prop != "_") meta[prop] = tok.data[prop] + for (let prop in tok.args[0]) if (prop != "_") meta[prop] = tok.args[0][prop] } else if (type == "meta_id") { for (let j = i + 1; j < tokens.length; j++) if (tokens[j].tag) { - ;(tokens[j].attrs || (tokens[j].attrs = [])).push(["id", tok.data.args[0]]) + ;(tokens[j].attrs || (tokens[j].attrs = [])).push(["id", tok.args[0]]) break } } else if (type == "meta_if_open") { - let tag = tok.data.args[0] + let tag = tok.args[0] if (options.defined.indexOf(tag) == -1) { - for (let j = i + 1; j < tokens.length; j++) if (tokens[j].type == "meta_if_close" && tokens[j].data.args[0] == tag) { + for (let j = i + 1; j < tokens.length; j++) if (tokens[j].type == "meta_if_close" && tokens[j].args[0] == tag) { i = j break } From dcd1c093ca6d9353d5c0bce6770e379bd65268df Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Thu, 28 Sep 2017 16:50:12 +0200 Subject: [PATCH 020/780] Further refine markdown tools --- src/chapter.html | 2 +- src/convert.js | 42 +++++++++++++++++++++++++++--------------- src/markdown.js | 9 +++++---- src/render_html.js | 22 +++++++++++++++++++--- src/transform.js | 2 +- 5 files changed, 53 insertions(+), 24 deletions(-) diff --git a/src/chapter.html b/src/chapter.html index 31443cb80..0d5728826 100644 --- a/src/chapter.html +++ b/src/chapter.html @@ -10,7 +10,7 @@ <> <> diff --git a/src/convert.js b/src/convert.js index 896d292f6..4eb783c83 100644 --- a/src/convert.js +++ b/src/convert.js @@ -8,31 +8,40 @@ function processIndexTerm(term) { return terms.length == 1 ? terms[0] : "[" + terms.join(", ") + "]" } +function maybeQuote(value) { + return /\W/.test(value) ? JSON.stringify(value) : value +} + text = text .replace(/^(:\w+:\s*.+\n)+/, function(meta) { let re = /(?:^|\n):(\w+):\s*(.+)/g, m, props = [] - while (m = re.exec(meta)) props.push(m[1] + ": " + m[2]) - return `{{meta {${props.join(", ")}}}}` + while (m = re.exec(meta)) props.push(m[1] + ": " + (m[1] == "load_files" ? m[2] : maybeQuote(m[2]))) + return `{{meta {${props.join(", ")}}}}\n` }) .replace(/\n(=+) (.*?) =+\n/g, function(_, depth, title) { return "\n" + "#".repeat(depth.length) + " " + title + "\n" }) - .replace(/\nimage::([^\]]+)\[(.*?)\]/g, function(_, url, meta) { - return "\n{{figure {url: " + JSON.stringify(url) + ", " + meta.replace(/="/g, ": \"") + "}}}" + .replace(/\nimage::([^\[]+)\[(.*?)\]\n/g, function(_, url, meta) { + return "\n{{figure {url: " + JSON.stringify(url) + ", " + meta.replace(/="/g, ": \"") + "}}}\n" }) - .replace(/\n(\[chapterquote=.*?\]\n)?\[quote, ([^\]]+)\]\n____\n([^]*?)____\n/g, function(_, chapter, author, content) { - let match = /([^,]+), (.+)/.exec(author), title = null - if (match) { title = match[2]; author = match[1] } - return "\n{{quote {" + (chapter ? "chapter: true, " : "") + "author: " + JSON.stringify(author) + - (title ? ", title: " + JSON.stringify(title) : "") + "}\n\n" + content + "\n}}\n" + .replace(/\n(\[chapterquote=.*?\]\n)?(?:\[quote,\s*([^\]]+)\]\n)?___+\n([^]*?)___+\n/g, function(_, chapter, author, content) { + let props = [] + if (author) { + let match = /([^,]+), (.+)/.exec(author), title = null + if (match) props.push(`author: ${JSON.stringify(match[1])}`, `title: ${JSON.stringify(match[2])}`) + else props.push(`author: ${JSON.stringify(author)}`) + } + if (chapter) props.push("chapter: true") + return `\n{{quote${props.length ? " {" + props.join(", ") + "}" : ""}\n\n${content}\nquote}}\n` }) - .replace(/\n\n+((?:(?!\n\n)[^])*?\(\(\((?:(?!\n\n)[^])*)/g, function(_, para) { + .replace(/\n\n+((?:(?!\n\n)[^])*?\(\(\((?:(?!\n\n)[^])*)/g, function(all, para) { let terms = [] para = para.replace(/\(\(\(((?:\([^\)]*\)|[^])*?)\)\)\)/g, function(_, content) { terms.push(content) return "" }).replace(/^\s*/, "") - return "\n\n{{index " + terms.map(processIndexTerm).join(", ") + "}}\n\n" + para + if (terms.length) return "\n\n{{index " + terms.map(processIndexTerm).join(", ") + "}}\n\n" + para + else return all }) .replace(/\bindexsee:\[(.*?),\s*(.*?)\]\s*/g, function(_, term, ref) { return "{{indexsee " + processIndexTerm(term) + ", " + processIndexTerm(ref) + "}}\n\n" @@ -44,8 +53,8 @@ text = text if (sandbox) params.push("sandbox-" + sandbox) return "\n```" + params.join(" ") + "\n" + content + "\n```\n" }) - .replace(/\n\/\/ (?:(start_code)|test: (.*)|include_code (.*))/g, function(_, startCode, test, includeCode) { - if (startCode) return "\n{{startCode}}" + .replace(/\n\/\/ (?:(start_code(?: (.*))?)|test: (.*)|include_code (.*))/g, function(_, startCode, startCodeParam, test, includeCode) { + if (startCode) return `\n{{startCode${startCodeParam ? " " + JSON.stringify(startCodeParam) : ""}}}` if (test) return "\n{{test " + test + "}}" return "\n{{includeCode " + JSON.stringify(includeCode) + "}}" }) @@ -53,7 +62,7 @@ text = text return "[" + content + "](" + url + ")" }) .replace(/\nifdef::(\w+?)_target\[\]\n([^]*?)\nendif::.*/g, function(_, type, content) { - return "\n{{if " + type + "\n" + content + "\n}}" + return "\n{{if " + type + "\n" + content + "\nif}}" }) .replace(/\+\+(?! |\))((?:(?!\n\n)[^])+)\+\+/g, function(_, text) { return "_" + text + "_" @@ -62,7 +71,10 @@ text = text return "_" + text + "_" }) .replace(/\n\[\[(.*?)\]\]\n/g, function(_, name) { - return `\n{{id ${/\W/.test(name) ? JSON.stringify(name) : name}}}\n` + return `\n{{id ${maybeQuote(name)}}}\n` + }) + .replace(/\n!!hint!!\n([^]+?)\n!!hint!!/g, function(_, content) { + return `\n{{hint\n${content}\nhint}}` }) .replace(/\[sic]/, "\\[sic]") diff --git a/src/markdown.js b/src/markdown.js index 0d04f32a6..6948ee7ba 100644 --- a/src/markdown.js +++ b/src/markdown.js @@ -37,13 +37,14 @@ function parseBlockMeta(state, startLine, endLine) { let data = parseData(content) if (!data) return false - let line = startLine + 1, depth = 0 + let line = startLine + 1, close = data.tag + "}}", open = new RegExp("^\\{\\{" + data.tag + "(\\s|$)"), depth = 0 for (; line < endLine; line++) { if (line == endLine) throw new SyntaxError("Unterminated meta block") let start = state.bMarks[line] + state.tShift[line] - let after = state.src.slice(start, start + 2) - if (after == "{{" && !/\}\}\s*$/.test(state.src.slice(start, state.eMarks[line]))) depth++ - else if (after == "}}") { + let lineText = state.src.slice(start, state.eMarks[line]) + if (open.test(lineText)) { + depth++ + } else if (lineText == close) { if (depth) depth-- else break } diff --git a/src/render_html.js b/src/render_html.js index e6a094701..b124bb6b2 100644 --- a/src/render_html.js +++ b/src/render_html.js @@ -32,7 +32,7 @@ function anchor(token) { } function attrs(token) { - return token.attrs ? token.attrs.map(([name, val]) => ` ${name}="${escape(val)}"`).join("") : "" + return token.attrs ? token.attrs.map(([name, val]) => ` ${name}="${escape(String(val))}"`).join("") : "" } let renderer = { @@ -61,6 +61,18 @@ let renderer = { heading_close(token) { return `` }, + bullet_list_open(token) { return `\n\n` }, + + bullet_list_close() { return `` }, + + ordered_list_open(token) { return `\n\n` }, + + ordered_list_close() { return `\n\n` }, + + list_item_open() { return "\n\n
  • " }, + + list_item_close() { return "
  • " }, + strong_open() { return "" }, strong_close() { return "" }, @@ -86,10 +98,14 @@ let renderer = { meta_quote_open() { return "\n\n
    " }, meta_quote_close(token) { - let {author, title} = token.args[0] + let {author, title} = token.args[0] || {} return (author ? `\n\n
    ${escape(author)}${title ? `, ${escape(title)}` : ""}` : "") + "\n\n
    " - } + }, + + meta_hint_open() { return "\n\n
    " }, + + meta_hint_close() { return "\n\n
    " } } function renderArray(tokens) { diff --git a/src/transform.js b/src/transform.js index b69672edd..7254bc590 100644 --- a/src/transform.js +++ b/src/transform.js @@ -64,7 +64,7 @@ exports.transformTokens = function(tokens, options) { for (let i = 0; i < tokens.length; i++) { let tok = tokens[i], type = tok.type if (type == "meta_meta") { - for (let prop in tok.args[0]) if (prop != "_") meta[prop] = tok.args[0][prop] + for (let prop in tok.args[0]) meta[prop] = tok.args[0][prop] } else if (type == "meta_id") { for (let j = i + 1; j < tokens.length; j++) if (tokens[j].tag) { ;(tokens[j].attrs || (tokens[j].attrs = [])).push(["id", tok.args[0]]) From 038d3aaf7d0e89edb0ba0f6df1a7bbf4a998a5dc Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Thu, 28 Sep 2017 16:52:39 +0200 Subject: [PATCH 021/780] Add converted markdown files --- 00_intro.md | 554 +++++++++++++ 01_values.md | 684 ++++++++++++++++ 02_program_structure.md | 1125 +++++++++++++++++++++++++++ 03_functions.md | 1140 +++++++++++++++++++++++++++ 04_data.md | 1585 +++++++++++++++++++++++++++++++++++++ 05_higher_order.md | 1194 ++++++++++++++++++++++++++++ 05_higher_order.txt | 1 + 06_object.md | 1342 ++++++++++++++++++++++++++++++++ 07_elife.md | 1330 +++++++++++++++++++++++++++++++ 08_error.md | 952 +++++++++++++++++++++++ 09_regexp.md | 1443 ++++++++++++++++++++++++++++++++++ 10_modules.md | 1041 +++++++++++++++++++++++++ 11_language.md | 968 +++++++++++++++++++++++ 12_browser.md | 455 +++++++++++ 13_dom.md | 1289 ++++++++++++++++++++++++++++++ 14_event.md | 1238 +++++++++++++++++++++++++++++ 15_game.md | 1514 ++++++++++++++++++++++++++++++++++++ 16_canvas.md | 1635 +++++++++++++++++++++++++++++++++++++++ 17_http.md | 1088 ++++++++++++++++++++++++++ 18_forms.md | 1062 +++++++++++++++++++++++++ 19_paint.md | 1061 +++++++++++++++++++++++++ 20_node.md | 1437 ++++++++++++++++++++++++++++++++++ 21_skillsharing.md | 1471 +++++++++++++++++++++++++++++++++++ 23 files changed, 25609 insertions(+) create mode 100644 00_intro.md create mode 100644 01_values.md create mode 100644 02_program_structure.md create mode 100644 03_functions.md create mode 100644 04_data.md create mode 100644 05_higher_order.md create mode 100644 06_object.md create mode 100644 07_elife.md create mode 100644 08_error.md create mode 100644 09_regexp.md create mode 100644 10_modules.md create mode 100644 11_language.md create mode 100644 12_browser.md create mode 100644 13_dom.md create mode 100644 14_event.md create mode 100644 15_game.md create mode 100644 16_canvas.md create mode 100644 17_http.md create mode 100644 18_forms.md create mode 100644 19_paint.md create mode 100644 20_node.md create mode 100644 21_skillsharing.md diff --git a/00_intro.md b/00_intro.md new file mode 100644 index 000000000..5c8c3bd61 --- /dev/null +++ b/00_intro.md @@ -0,0 +1,554 @@ +{{meta {next_link: 01_values, load_files: ["code/intro.js"]}}} + +# Introduction + +This is a book about getting ((computer))s to do what you want them to +do. Computers are about as common as screwdrivers today, but they contain a +lot more hidden complexity and thus are harder to operate and +understand. To many, they remain alien, slightly threatening things. + +{{figure {url: "img/generated/computer.png", alt: "Communicating with a computer"}}} + +{{index "graphical user interface"}} + +We've found two effective ways of +bridging the communication gap between us, squishy biological +organisms with a talent for social and spatial reasoning, and +computers, unfeeling manipulators of meaningless data. The first is to +appeal to our sense of the physical world and build interfaces that +mimic that world and allow us to manipulate shapes on a screen with +our fingers. This works very well for casual machine interaction. + +{{index "programming language"}} + +But we have not yet found a good way to use +the point-and-click approach to communicate things to the computer +that the designer of the interface did not anticipate. For open-ended +interfaces, such as instructing the computer to perform arbitrary +tasks, we've had more luck with an approach that makes use of our +talent for language: teaching the machine a language. + +{{index "human language", expressivity}} + +Human languages allow words and +phrases to be combined in many ways, which allows us to say +many different things. Computer languages, though typically less +grammatically flexible, follow a similar principle. + +{{index [JavaScript, "availability of"], "casual computing"}} + +Casual computing +has become much more widespread in the past 20 years, and +language-based interfaces, which once were the default way in which +people interacted with computers, have largely been replaced with +graphical interfaces. But they are still there, if you know where to +look. One such language, JavaScript, is built into almost every +web ((browser)) and is thus available on just about every consumer +device. + +{{indexsee "web browser", browser}} + +This book intends to make you familiar +enough with this language to be able to make a computer do what you +want. + +## On programming + +{{quote {author: "Confucius"} + +{{index Confucius}} + +I do not enlighten those who are not eager to learn, +nor arouse those who are not anxious to give an explanation +themselves. If I have presented one corner of the square and they +cannot come back to me with the other three, I should not go over the +points again. + +quote}} + +{{index [programming, "difficulty of"]}} + +Besides explaining JavaScript, I also +will introduce the basic principles of programming. Programming, it +turns out, is hard. The fundamental rules are typically simple and +clear. But programs built on top of these rules tend to become complex +enough to introduce their own rules and complexity. You're building +your own maze, in a way, and you might just get lost in it. + +{{index learning}} + +There will be times when reading this book feels terribly +frustrating. If you are new to programming, there will be a lot of new +material to digest. Much of this material will then be _combined_ in +ways that require you to make additional connections. + +It is up to you to make the necessary effort. When you are struggling +to follow the book, do not jump to any conclusions about your own +capabilities. You are fine—you just need to keep at it. Take a break, +reread some material, and _always_ make sure you read and understand +the example programs and ((exercises)). Learning is hard work, but +everything you learn is yours and will make subsequent learning +easier. + +{{quote {author: "Joseph Weizenbaum", title: "Computer Power and Human Reason"} + +{{index "Weizenbaum, Joseph"}} + +The computer programmer is a creator of +universes for which he \[sic] alone is responsible. Universes of virtually +unlimited complexity can be created in the form of computer programs. + +quote}} + +{{index [program, "nature of"], data}} + +A program is many things. It is a +piece of text typed by a programmer, it is the directing force that +makes the computer do what it does, it is data in the computer's +memory, yet it controls the actions performed on this same memory. +Analogies that try to compare programs to objects we are familiar with +tend to fall short. A superficially fitting one is that of a +machine—lots of separate parts tend to be involved, and to make the +whole thing tick, we have to consider the ways in which these parts +interconnect and contribute to the operation of the whole. + +{{index computer}} + +A computer is a machine built to act as a host for these +immaterial machines. Computers themselves can do only stupidly +straightforward things. The reason they are so useful is that they do +these things at an incredibly high speed. A program can ingeniously +combine an enormous number of these simple actions in order to do very +complicated things. + +{{index [programming, "joy of"]}} + +To some of us, writing computer programs is a +fascinating game. A program is a building of thought. It is costless +to build, it is weightless, and it grows easily under our typing +hands. + +But without care, a program's size and ((complexity)) will grow out of +control, confusing even the person who created it. Keeping programs +under control is the main problem of programming. When a program +works, it is beautiful. The art of programming is the skill of +controlling complexity. The great program is subdued—made simple in +its complexity. + +{{index "programming style", "best practices"}} + +Many programmers believe +that this complexity is best managed by using only a small set of +well-understood techniques in their programs. They have composed +strict rules (“best practices”) prescribing the form programs should +have, and the more zealous among them will consider those who go +outside of this safe little zone to be _bad_ programmers. + +{{index experiment, learning}} + +What hostility to the richness of +programming—to try to reduce it to something straightforward and +predictable, to place a taboo on all the weird and beautiful programs! +The landscape of programming techniques is enormous, fascinating in +its diversity, and still largely unexplored. It is certainly dangerous +going, luring the inexperienced programmer into all kinds of +confusion, but that only means you should proceed with caution and +keep your wits about you. As you learn there will always be new +challenges and new territory to explore. Programmers who refuse to +keep exploring will stagnate, forget their joy, and get bored with +their craft. + +## Why language matters + +{{index "programming language", "machine code", "binary data"}} + +In the +beginning, at the birth of computing, there were no programming +languages. Programs looked something like this: + +```null +00110001 00000000 00000000 +00110001 00000001 00000001 +00110011 00000001 00000010 +01010001 00001011 00000010 +00100010 00000010 00001000 +01000011 00000001 00000000 +01000001 00000001 00000001 +00010000 00000010 00000000 +01100010 00000000 00000000 +``` + +{{index [programming, "history of"], "punch card", complexity}} + +That is a +program to add the numbers from 1 to 10 together and print out the +result: `1 + 2 + ... + 10 = 55`. It could run on a simple, +hypothetical machine. To program early computers, it was necessary to +set large arrays of switches in the right position or punch holes in +strips of cardboard and feed them to the computer. You can probably imagine +how tedious and error-prone this procedure was. Even writing simple +programs required much cleverness and discipline. Complex ones were +nearly inconceivable. + +{{index bit, "wizard (mighty)"}} + +Of course, manually entering these +arcane patterns of bits (the ones and zeros) did give the programmer +a profound sense of being a mighty wizard. And that has to be worth +something in terms of job satisfaction. + +{{index memory, instruction}} + +Each line of the previous program contains a +single instruction. It could be written in English like this: + +```text/plain +1. Store the number 0 in memory location 0. +2. Store the number 1 in memory location 1. +3. Store the value of memory location 1 in memory location 2. +4. Subtract the number 11 from the value in memory location 2. +5. If the value in memory location 2 is the number 0, + continue with instruction 9. +6. Add the value of memory location 1 to memory location 0. +7. Add the number 1 to the value of memory location 1. +8. Continue with instruction 3. +9. Output the value of memory location 0. +``` + +{{index readability, naming, variable}} + +Although that is already +more readable than the soup of bits, it is still rather unpleasant. It +might help to use names instead of numbers for the instructions and +memory locations. + +```text/plain + Set “total” to 0. + Set “count” to 1. +[loop] + Set “compare” to “count”. + Subtract 11 from “compare”. + If “compare” is zero, continue at [end]. + Add “count” to “total”. + Add 1 to “count”. + Continue at [loop]. +[end] + Output “total”. +``` + +{{index loop, jump, "summing example"}} + +Can you see how the program +works at this point? The first two lines give +two memory locations their starting values: `total` will be used to +build up the result of the computation, and `count` will keep track of the +number that we are currently looking at. The lines using `compare` are +probably the weirdest ones. The program wants to see +whether `count` is equal to 11 in order to decide whether it can stop +running. Because our hypothetical machine is rather primitive, it can only +test whether a number is zero and make a decision (or jump) based on +that. So it uses the memory location labeled `compare` to compute the +value of `count - 11` and makes a decision based on that value. The +next two lines add the value of `count` to the result and increment +`count` by 1 every time the program has decided that `count` is not 11 yet. + +Here is the same program in JavaScript: + +``` +var total = 0, count = 1; +while (count <= 10) { + total += count; + count += 1; +} +console.log(total); +// → 55 +``` + +{{index "while loop", loop}} + +This version gives us a few more improvements. +Most importantly, there is no need to specify the way we want the +program to jump back and forth anymore. The `while` language +construct takes care of that. It continues executing the block +(wrapped in braces) below it as long as the condition it was given +holds. That condition is `count <= 10`, which means “_count_ is less than or equal to +10”. We no longer have to create a temporary value and compare that +to zero, which was an uninteresting detail. Part of the power of +programming languages is that they take care of uninteresting details +for us. + +{{index "console.log"}} + +At the end of the program, after the `while` construct has +finished, the `console.log` operation is applied to the result in +order to write it as output. + +{{index "sum function", "range function", abstraction, function}} + +Finally, here is what the +program could look like if we happened to have the convenient +operations `range` and `sum` available, which respectively create a +((collection)) of numbers within a range and compute the sum of a +collection of numbers: + +{{startCode}} + +``` +console.log(sum(range(1, 10))); +// → 55 +``` + +{{index readability}} + +The moral of this story is that the same program can +be expressed in long and short, unreadable and readable ways. The +first version of the program was extremely obscure, whereas this last +one is almost English: `log` the `sum` of the `range` of numbers from +1 to 10. (We will see in [later chapters](04_data.html#data) how to +build operations like `sum` and `range`.) + +{{index ["programming language", "power of"], composability}} + +A good +programming language helps the programmer by allowing them to talk +about the actions that the computer has to perform on a higher level. +It helps omit uninteresting details, provides convenient building +blocks (such as `while` and `console.log`), allows you to define your +own building blocks (such as `sum` and `range`), and makes those blocks +easy to compose. + +## What is JavaScript? + +{{index history, Netscape, browser, "web application", JavaScript, [JavaScript, "history of"], "World Wide Web"}} + +{{indexsee WWW, "World Wide Web"}} + +{{indexsee Web, "World Wide Web"}} + +JavaScript was introduced in 1995 as a way to add programs to +web pages in the Netscape Navigator browser. The language has since +been adopted by all other major graphical web browsers. It has made modern +web applications possible—applications with which you can interact +directly, without doing a page reload for every action. But it is also used in more +traditional websites to provide various forms of interactivity and +cleverness. + +{{index Java, naming}} + +It is important to note that JavaScript has +almost nothing to do with the programming language named Java. The +similar name was inspired by marketing considerations, rather than +good judgment. When JavaScript was being introduced, the Java language +was being heavily marketed and was gaining popularity. Someone +thought it was a good idea to try to ride along on this success. Now we +are stuck with the name. + +{{index ECMAScript, compatibility}} + +After its adoption outside of +Netscape, a ((standard)) document was written to describe the way the +JavaScript language should work to make sure the various pieces of +software that claimed to support JavaScript were actually talking +about the same language. This is called the ECMAScript standard, after +the Ecma International organization that did the standardization. In +practice, the terms ECMAScript and JavaScript can be used interchangeably—they +are two names for the same language. + +{{index [JavaScript, "weaknesses of"], debugging}} + +There are those who will +say _terrible_ things about the JavaScript language. Many of these +things are true. When I was required to write something in JavaScript +for the first time, I quickly came to despise it. It would accept +almost anything I typed but interpret it in a way that was completely +different from what I meant. This had a lot to do with the fact that I +did not have a clue what I was doing, of course, but there is a real +issue here: JavaScript is ridiculously liberal in what it allows. The +idea behind this design was that it would make programming in +JavaScript easier for beginners. In actuality, it mostly makes finding +problems in your programs harder because the system will not point +them out to you. + +{{index [JavaScript, "flexibility of"], flexibility}} + +This flexibility also +has its advantages, though. It leaves space for a lot of techniques +that are impossible in more rigid languages, and as you will see (for +example in [Chapter 10](10_modules.html#modules)) it +can be used to overcome some of JavaScript's shortcomings. After +((learning)) the language properly and working with it for a while, I have +learned to actually _like_ JavaScript. + +{{index future, [JavaScript, "versions of"], ECMAScript, "ECMAScript 6"}} + +There have been several versions of JavaScript. ECMAScript +version 3 was the widely supported version in the time of +JavaScript's ascent to dominance, roughly between 2000 and 2010. +During this time, work was underway on an ambitious version 4, which +planned a number of radical improvements and extensions to the +language. Changing a living, widely used language in such a radical +way turned out to be politically difficult, and work on the version 4 +was abandoned in 2008, leading to the much less ambitious version 5 +coming out in 2009. We're now at the point where all major +browsers support version 5, which is the language version that +this book will be focusing on. A version 6 is in the process of +being finalized, and some browsers are starting to support new +features from this version. + +{{index [JavaScript, "uses of"]}} + +Web browsers are not the only platforms on +which JavaScript is used. Some databases, such as MongoDB and CouchDB, +use JavaScript as their scripting and query language. Several +platforms for desktop and server programming, most notably the +((Node.js)) project (the subject of link:20_node.html#node[Chapter +20]) are providing a powerful environment for programming JavaScript +outside of the browser. + +## Code, and what to do with it + +{{index "reading code", "writing code"}} + +Code is the text that makes up +programs. Most chapters in this book contain quite a lot of it. In my +experience, reading code and writing ((code)) are indispensable parts of +((learning)) to program, so try to not just glance over the examples. Read +them attentively and understand them. This may be slow and confusing +at first, but I promise that you will quickly get the hang of it. The +same goes for the ((exercises)). Don't assume you understand them +until you've actually written a working solution. + +{{index interpretation}} + +I recommend you try your solutions to exercises +in an actual JavaScript interpreter. That way, you'll get immediate feedback on +whether what you are doing is working, and, I hope, you'll be +tempted to ((experiment)) and go beyond the exercises. + +{{if interactive + +When reading this book in your browser, you can edit (and run) all +example programs by clicking them. + +if}} + +{{if book + +{{index download, sandbox, "running code"}} + +The easiest way to run +the example code in the book, and to experiment with it, is to look it +up in the online version of the book at +http://eloquentjavascript.net/[_eloquentjavascript.net_]. There, you +can click any code example to edit and run it and to see the +output it produces. To work on the exercises, go to +http://eloquentjavascript.net/code[_eloquentjavascript.net/code_], +which provides starting code for each coding exercise and allows you +to look at the solutions. + +if}} + +{{index "developer tools", "JavaScript console"}} + +If you want to run the +programs defined in this book outside of the book's sandbox, some care +is required. Many examples stand on their own and should work in any +JavaScript environment. But code in later chapters is mostly written +for a specific environment (the browser or Node.js) and can run only +there. In addition, many chapters define bigger programs, and the +pieces of code that appear in them depend on each other or on external +files. The http://eloquentjavascript.net/code[sandbox] on the website +provides links to Zip files containing all of the scripts and data +files necessary to run the code for a given chapter. + +## Overview of this book + +This book contains roughly three parts. The first 11 chapters discuss +the JavaScript language itself. The next eight chapters are about web +((browsers)) and the way JavaScript is used to program them. Finally, +two chapters are devoted to ((Node.js)), another environment to program +JavaScript in. + +Throughout the book, there are five _project chapters_, which describe +larger example programs to give you a taste of real programming. In +order of appearance, we will work through building an +[artificial life simulation](07_elife.html#elife), a +[programming language](11_language.html#language), a +[platform game](15_game.html#game), a +[paint program](19_paint.html#paint), and a +[dynamic website](21_skillsharing.html#skillsharing). + +The language part of the book starts with four chapters to introduce +the basic structure of the JavaScript language. They introduce +[control structures](02_program_structure.html#program_structure) +(such as the `while` word you saw in this introduction), +[functions](03_functions.html#functions) (writing your own +operations), and [data structures](04_data.html#data). After these, +you will be able to write simple programs. Next, Chapters +[5](05_higher_order.html#higher_order) and +[6](06_object.html#object) introduce techniques to use functions +and objects to write more _abstract_ code and thus keep complexity +under control. + +After a [first project chapter](07_elife.html#elife), the first +part of the book continues with chapters on +[error handling and fixing](08_error.html#error), on +[regular expressions](09_regexp.html#regexp) (an important tool for +working with text data), and on +[modularity](10_modules.html#modules)—another weapon against +complexity. The [second project chapter](11_language.html#language) +concludes the first part of the book. + +The second part, Chapters [12](12_browser.html#browser) to +[19](19_paint.html#paint), describes the tools that browser +JavaScript has access to. You'll learn to display things on the screen +(Chapters [13](13_dom.html#dom) and +[16](16_canvas.html#canvas)), respond to user input (Chapters +[14](14_event.html#event) and [18](18_forms.html#forms)), and +communicate over the network ([Chapter 17](17_http.html#http)). +There are again two project chapters in this part. + +After that, [Chapter 20](20_node.html#node) describes Node.js, and +[Chapter 21](21_skillsharing.html#skillsharing) builds a simple web +system using that tool. + +{{if commercial + +Finally, [Chapter 22](22_fast.html#fast) describes some of the +considerations that come up when optimizing JavaScript programs for +speed. + +if}} + +## Typographic conventions + +{{index "factorial function"}} + +In this book, text written in a `monospaced` +font will represent elements of programs—sometimes +they are self-sufficient fragments, and sometimes they just refer to +part of a nearby program. Programs (of which you have already seen a +few), are written as follows: + +``` +function fac(n) { + if (n == 0) + return 1; + else + return fac(n - 1) * n; +} +``` + +{{index "console.log"}} + +Sometimes, in order to show the output that a program +produces, the expected output is written after it, with two slashes +and an arrow in front. + +``` +console.log(fac(8)); +// → 40320 +``` + +Good luck! + diff --git a/01_values.md b/01_values.md new file mode 100644 index 000000000..b5293528c --- /dev/null +++ b/01_values.md @@ -0,0 +1,684 @@ +{{meta {chap_num: 1, prev_link: 00_intro, next_link: 02_program_structure, docid: values}}} + +# Values, Types, and Operators + +{{quote {author: "Master Yuan-Ma", title: "The Book of Programming", chapter: true} + +Below the surface of the +machine, the program moves. Without effort, it expands and contracts. +In great harmony, electrons scatter and regroup. The forms on the +monitor are but ripples on the water. The essence stays invisibly +below. + +quote}} + +{{index "Yuan-Ma", "Book of Programming", "binary data", data, bit, memory}} + +Inside the computer's world, +there is only data. You can read data, modify data, create new +data—but anything that isn't data simply does not exist. All this data +is stored as long sequences of bits and is thus fundamentally alike. + +{{index CD, signal}} + +Bits are any kind of two-valued things, usually +described as zeros and ones. Inside the computer, they take forms +such as a high or low electrical charge, a strong or weak signal, or a +shiny or dull spot on the surface of a CD. Any piece of discrete +information can be reduced to a sequence of zeros and ones and thus +represented in bits. + +{{index "binary number", radix, "decimal number"}} + +For example, think +about how you might show the number 13 in bits. It works the same way +you write decimal numbers, but instead of 10 different ((digit))s, you +have only 2, and the weight of each increases by a factor of 2 from +right to left. Here are the bits that make up the number 13, with the +weights of the digits shown below them: + +```null + 0 0 0 0 1 1 0 1 + 128 64 32 16 8 4 2 1 +``` + +So that's the binary number 00001101, or 8 + 4 + 1, which equals 13. + +## Values + +{{index memory, "volatile data storage", "hard drive"}} + +Imagine a sea of +bits. An ocean of them. A typical modern computer has more than 30 +billion bits in its volatile data storage. Nonvolatile storage (the +hard disk or equivalent) tends to have yet a few orders of magnitude +more. + +{{figure {url: "img/bit-sea.png", alt: "The Ocean of Bits"}}} + +To be able to work with such quantities of bits without getting lost, +you can separate them into chunks that represent pieces of +information. In a JavaScript environment, those chunks are called +_((value))s_. Though all values are made of bits, they play different +roles. Every value has a ((type)) that determines its role. There are +six basic types of values in JavaScript: numbers, strings, Booleans, +objects, functions, and undefined values. + +{{index "garbage collection"}} + +To create a value, you must merely invoke its +name. This is convenient. You don't have to gather building material +for your values or pay for them. You just call for one, and _woosh_, +you have it. They are not created from thin air, of course. Every +value has to be stored somewhere, and if you want to use a gigantic +amount of them at the same time, you might run out of bits. +Fortunately, this is a problem only if you need them all +simultaneously. As soon as you no longer use a value, it will +dissipate, leaving behind its bits to be recycled as building material +for the next generation of values. + +This chapter introduces the atomic elements of JavaScript programs, +that is, the simple value types and the operators that can act on such +values. + +## Numbers + +{{index syntax, number, [number, notation]}} + +Values of the +_number_ type are, unsurprisingly, numeric values. In a JavaScript +program, they are written as follows: + +``` +13 +``` + +{{index "binary number"}} + +Use that in a program, and it will cause the bit +pattern for the number 13 to come into existence inside the computer's +memory. + +{{index [number, representation], bit}} + +JavaScript uses a fixed +number of bits, namely 64 of them, to store a single number value. +There are only so many patterns you can make with 64 bits, which means +that the amount of different numbers that can be represented is +limited. For _N_ decimal ((digit))s, the amount of numbers that can be +represented is 10^_N_^. Similarly, given 64 binary digits, you can +represent 2^64^ different numbers, which is about 18 quintillion (an +18 with 18 zeros after it). This is a lot. + +Computer memory used to be a lot smaller, and people tended to use +groups of 8 or 16 bits to represent their numbers. It was easy to +accidentally _((overflow))_ such small numbers—to end up with a number +that did not fit into the given amount of bits. Today, even personal +computers have plenty of memory, so you are free to use 64-bit chunks, +which means you need to worry about overflow only when dealing with +truly astronomical numbers. + +{{index sign, "floating-point number", "fractional number", "sign bit"}} + +Not +all whole numbers below 18 quintillion fit in a JavaScript number, +though. Those bits also store negative numbers, so one bit indicates +the sign of the number. A bigger issue is that nonwhole numbers must +also be represented. To do this, some of the bits are used to store +the position of the decimal point. The actual maximum whole number +that can be stored is more in the range of 9 quadrillion (15 zeros), +which is still pleasantly huge. + +{{index [number, notation]}} + +Fractional numbers are written by using a +dot. + +``` +9.81 +``` + +{{index exponent, "scientific notation", [number, notation]}} + +For +very big or very small numbers, you can also use scientific notation +by adding an “e” (for “exponent”), followed by the exponent of the +number: + +``` +2.998e8 +``` + +That is 2.998 × 10^8^ = 299,800,000. + +{{index pi, [number, "precision of"], "floating-point number"}} + +Calculations with whole numbers (also called _((integer))s_) +smaller than the aforementioned 9 quadrillion are guaranteed to always +be precise. Unfortunately, calculations with fractional numbers are +generally not. Just as π (pi) cannot be precisely expressed by a +finite number of decimal digits, many numbers lose some precision when +only 64 bits are available to store them. This is a shame, but it +causes practical problems only in specific situations. The important +thing is to be aware of it and treat fractional digital numbers as +approximations, not as precise values. + +### Arithmetic + +{{index syntax, operator, "binary operator", arithmetic, addition, multiplication}} + +The main +thing to do with numbers is arithmetic. Arithmetic operations such as +addition or multiplication take two number values and produce a new +number from them. Here is what they look like in JavaScript: + +``` +100 + 4 * 11 +``` + +{{index [operator, application], asterisk, "plus character", "pass:[*] operator", "+ operator"}} + +The `+` and `*` +symbols are called _operators_. The first stands for addition, and the +second stands for multiplication. Putting an operator between two +values will apply it to those values and produce a new value. + +{{index grouping, parentheses, precedence}} + +Does the example mean +“add 4 and 100, and multiply the result by 11”, or is the +multiplication done before the adding? As you might have guessed, the +multiplication happens first. But as in mathematics, you can change +this by wrapping the addition in parentheses. + +``` +(100 + 4) * 11 +``` + +{{index "dash character", "slash character", division, subtraction, minus, "- operator", "/ operator"}} + +For subtraction, there is the `-` operator, +and division can be done with the `/` operator. + +When operators appear together without parentheses, the order in which +they are applied is determined by the _((precedence))_ of the +operators. The example shows that multiplication comes before +addition. The `/` operator has the same precedence as `*`. Likewise +for `+` and `-`. When multiple operators with the same precedence +appear next to each other, as in `1 - 2 + 1`, they are applied left +to right: `(1 - 2) + 1`. + +These rules of precedence are not something you should worry about. +When in doubt, just add parentheses. + +{{index "modulo operator", division, "remainder operator", "% operator"}} + +There is one more arithmetic operator, which you might not +immediately recognize. The `%` symbol is used to represent the +_remainder_ operation. `X % Y` is the remainder of dividing `X` by +`Y`. For example, `314 % 100` produces `14`, and `144 % 12` gives `0`. +Remainder's precedence is the same as that of multiplication and +division. You'll often see this operator referred to as _modulo_, +though technically _remainder_ is more accurate. + +### Special numbers + +{{index [number, "special values"]}} + +There are three special values in +JavaScript that are considered numbers but don't behave like normal +numbers. + +{{index infinity}} + +The first two are `Infinity` and `-Infinity`, which +represent the positive and negative infinities. `Infinity - 1` is +still `Infinity`, and so on. Don't put too much trust in +infinity-based computation. It isn't mathematically solid, and it will +quickly lead to our next special number: `NaN`. + +{{index NaN, "not a number", "division by zero"}} + +`NaN` stands for “not +a number”, even though it is a value of the number type. You'll get +this result when you, for example, try to calculate `0 / 0` (zero +divided by zero), `Infinity - Infinity`, or any number of other +numeric operations that don't yield a precise, meaningful result. + +## Strings + +{{index syntax, text, character, [string, notation], "single-quote character", "double-quote character", "quotation mark"}} + +The next +basic data type is the _((string))_. Strings are used to represent +text. They are written by enclosing their content in quotes. + +``` +"Patch my boat with chewing gum" +'Monkeys wave goodbye' +``` + +Both single and double quotes can be used to mark strings as long as +the quotes at the start and the end of the string match. + +{{index "line break", "newline character"}} + +Almost anything can be put +between quotes, and JavaScript will make a string value out of it. But +a few characters are more difficult. You can imagine how putting +quotes between quotes might be hard. _Newlines_ (the characters you +get when you press Enter) also can't be put between quotes. The string +has to stay on a single line. + +{{index [escaping, "in strings"], "backslash character"}} + +To make it possible to include +such characters in a string, the following notation is used: whenever +a backslash (`\`) is found inside quoted text, it indicates that the +character after it has a special meaning. This is called _escaping_ +the character. A quote that is preceded by a backslash will not end +the string but be part of it. When an `n` character occurs after a +backslash, it is interpreted as a newline. Similarly, a `t` after a +backslash means a ((tab character)). Take the following string: + +``` +"This is the first line\nAnd this is the second" +``` + +The actual text contained is this: + +```null +This is the first line +And this is the second +``` + +There are, of course, situations where you want a backslash in a +string to be just a backslash, not a special code. If two backslashes +follow each other, they will collapse together, and only one will be +left in the resulting string value. This is how the string “_A newline +character is written like "\n"._” can be expressed: + +``` +"A newline character is written like \"\\n\"." +``` + +{{index "+ operator", concatenation}} + +Strings cannot be divided, +multiplied, or subtracted, but the `+` operator _can_ be used on them. +It does not add, but it _concatenates_—it glues two strings together. +The following line will produce the string `"concatenate"`: + +``` +"con" + "cat" + "e" + "nate" +``` + +There are more ways of manipulating strings, which we will discuss +when we get to methods in [Chapter 4](04_data.html#methods). + +## Unary operators + +{{index operator, "typeof operator", type}} + +Not all operators are +symbols. Some are written as words. One example is the `typeof` +operator, which produces a string value naming the type of the value +you give it. + +``` +console.log(typeof 4.5) +// → number +console.log(typeof "x") +// → string +``` + +{{id "console.log"}} + +{{index "console.log", output, "JavaScript console"}} + +We will use +`console.log` in example code to indicate that we want to see the +result of evaluating something. When you run such code, the value +produced should be shown on the screen, though how it appears will +depend on the JavaScript environment you use to run it. + +{{index negation, "- operator", "binary operator", "unary operator"}} + +The other operators we saw all operated on two values, but +`typeof` takes only one. Operators that use two values are called +_binary_ operators, while those that take one are called _unary_ +operators. The minus operator can be used both as a binary operator +and as a unary operator. + +``` +console.log(- (10 - 2)) +// → -8 +``` + +## Boolean values + +{{index Boolean, operator, true, false, bit}} + +Often, +you will need a value that simply distinguishes between two +possibilities, like “yes” and “no” or “on” and “off”. For this, +JavaScript has a _Boolean_ type, which has just two values: true and +false (which are written simply as those words). + +### Comparisons + +{{index comparison}} + +Here is one way to produce Boolean values: + +``` +console.log(3 > 2) +// → true +console.log(3 < 2) +// → false +``` + +{{index [comparison, "of numbers"], "> operator", "< operator", "greater than", "less than"}} + +The `>` and `<` signs are the traditional +symbols for “is greater than” and “is less than”, respectively. They +are binary operators. Applying them results in a Boolean value that +indicates whether they hold true in this case. + +Strings can be compared in the same way. + +``` +console.log("Aardvark" < "Zoroaster") +// → true +``` + +{{index [comparison, "of strings"]}} + +The way strings are ordered is more or less +alphabetic: uppercase letters are always “less” than lowercase ones, +so `"Z" < "a"` is true, and non-alphabetic characters (!, -, and so on) +are also included in the ordering. The actual comparison is based on +the _((Unicode))_ standard. This standard assigns a number to +virtually every character you would ever need, including characters +from Greek, Arabic, Japanese, Tamil, and so on. Having such numbers is +useful for storing strings inside a computer because it makes it +possible to represent them as a sequence of numbers. When comparing +strings, JavaScript goes over them from left to right, comparing the +numeric codes of the characters one by one. + +{{index equality, ">= operator", "pass:[<=] operator", "== operator", "!= operator"}} + +Other similar operators are `>=` (greater +than or equal to), `<=` (less than or equal to), `==` (equal to), and +`!=` (not equal to). + +``` +console.log("Itchy" != "Scratchy") +// → true +``` + +{{index [comparison, "of NaN"], NaN}} + +There is only one value in JavaScript +that is not equal to itself, and that is `NaN`, which stands for “not +a number”. + +``` +console.log(NaN == NaN) +// → false +``` + +`NaN` is supposed to denote the result of a nonsensical computation, +and as such, it isn't equal to the result of any _other_ nonsensical +computations. + +### Logical operators + +{{index reasoning, "logical operators"}} + +There are also some operations +that can be applied to Boolean values themselves. JavaScript supports +three logical operators: _and_, _or_, and _not_. These can be used to +“reason” about Booleans. + +{{index "&& operator", "logical and"}} + +The `&&` operator represents logical +_and_. It is a binary operator, and its result is true only if both +the values given to it are true. + +``` +console.log(true && false) +// → false +console.log(true && true) +// → true +``` + +{{index "|| operator", "logical or"}} + +The `||` operator denotes logical +_or_. It produces true if either of the values given to it is true. + +``` +console.log(false || true) +// → true +console.log(false || false) +// → false +``` + +{{index negation, "! operator"}} + +_Not_ is written as an exclamation mark +(`!`). It is a unary operator that flips the value given to it—`!true` +produces `false` and `!false` gives `true`. + +{{index precedence}} + +When mixing these Boolean operators with arithmetic +and other operators, it is not always obvious when parentheses are +needed. In practice, you can usually get by with knowing that of the +operators we have seen so far, `||` has the lowest precedence, then +comes `&&`, then the comparison operators (`>`, `==`, and so on), and +then the rest. This order has been chosen such that, in typical +expressions like the following one, as few parentheses as possible are +necessary: + +``` +1 + 1 == 2 && 10 * 10 > 50 +``` + +{{index "conditional execution", "ternary operator", "?: operator", "conditional operator", "colon character", "question mark"}} + +The last logical operator I will discuss is not unary, not +binary, but _ternary_, operating on three values. It is written with a +question mark and a colon, like this: + +``` +console.log(true ? 1 : 2); +// → 1 +console.log(false ? 1 : 2); +// → 2 +``` + +This one is called the _conditional_ operator (or sometimes just +_ternary_ operator since it is the only such operator in the +language). The value on the left of the question mark “picks” which of +the other two values will come out. When it is true, the middle value +is chosen, and when it is false, the value on the right comes out. + +## Undefined values + +{{index undefined, null}} + +There are two special values, written `null` +and `undefined`, that are used to denote the absence of a meaningful +value. They are themselves values, but they carry no +information. + +Many operations in the language that don't produce a meaningful value +(you'll see some later) yield `undefined` simply because they have to +yield _some_ value. + +The difference in meaning between `undefined` and `null` is an accident +of JavaScript's design, and it doesn't matter most of the time. In the cases +where you actually have to concern yourself with these values, I +recommend treating them as interchangeable (more on that in a moment). + +## Automatic type conversion + +{{index NaN, "type coercion"}} + +In the introduction, I mentioned that +JavaScript goes out of its way to accept almost any program you give +it, even programs that do odd things. This is nicely demonstrated by +the following expressions: + +``` +console.log(8 * null) +// → 0 +console.log("5" - 1) +// → 4 +console.log("5" + 1) +// → 51 +console.log("five" * 2) +// → NaN +console.log(false == 0) +// → true +``` + +{{index "+ operator", arithmetic, "pass:[*] operator", "- operator"}} + +When an operator is applied to the “wrong” type of value, +JavaScript will quietly convert that value to the type it wants, using +a set of rules that often aren't what you want or expect. This is +called _((type coercion))_. So the `null` in the first expression becomes +`0`, and the `"5"` in the second expression becomes `5` (from string +to number). Yet in the third expression, `+` tries string +concatenation before numeric addition, so the `1` is converted to +`"1"` (from number to string). + +{{index "type coercion", [number, "conversion to"]}} + +When something that +doesn't map to a number in an obvious way (such as `"five"` or +`undefined`) is converted to a number, the value `NaN` is produced. +Further arithmetic operations on `NaN` keep producing `NaN`, so if you +find yourself getting one of those in an unexpected place, look for +accidental type conversions. + +{{index null, undefined, [comparison, "of undefined values"], "== operator"}} + +When comparing values of the same type using `==`, the +outcome is easy to predict: you should get true when both values are +the same, except in the case of `NaN`. But when the types differ, +JavaScript uses a complicated and confusing set of rules to determine +what to do. In most cases, it just tries to convert one of the values +to the other value's type. However, when `null` or `undefined` occurs +on either side of the operator, it produces true only if both sides +are one of `null` or `undefined`. + +``` +console.log(null == undefined); +// → true +console.log(null == 0); +// → false +``` + +That last piece of behavior is often useful. When you want to test +whether a value has a real value instead of `null` or `undefined`, you +can simply compare it to `null` with the `==` (or `!=`) operator. + +{{index "type coercion", [Boolean, "conversion to"], "=== operator", "!== operator", comparison}} + +But what if you want to test whether +something refers to the precise value `false`? The rules for +converting strings and numbers to Boolean values state that `0`, +`NaN`, and the empty string (`""`) count as `false`, while all the +other values count as `true`. Because of this, expressions like `0 == +false` and `"" == false` are also true. For cases like this, where you +do _not_ want any automatic type conversions to happen, there are two +extra operators: `===` and `!==`. The first tests whether a value is +precisely equal to the other, and the second tests whether it is not +precisely equal. So `"" === false` is false as expected. + +I recommend using the three-character comparison operators defensively to +prevent unexpected type conversions from tripping you up. But when you're +certain the types on both sides will be the same, there is no problem with +using the shorter operators. + +### Short-circuiting of logical operators + +{{index "type coercion", [Boolean, "conversion to"], operator}} + +The +logical operators `&&` and `||` handle values of different types in a +peculiar way. They will convert the value on their left side to +Boolean type in order to decide what to do, but depending on the +operator and the result of that conversion, they return either the +_original_ left-hand value or the right-hand value. + +{{index "|| operator"}} + +The `||` operator, for example, will return the value +to its left when that can be converted to true and will return the +value on its right otherwise. This conversion works as you'd expect +for Boolean values and should do something analogous for values of +other types. + +``` +console.log(null || "user") +// → user +console.log("Karl" || "user") +// → Karl +``` + +{{index "default value"}} + +This functionality allows the `||` operator to be +used as a way to fall back on a default value. If you give it an +expression that might produce an empty value on the left, the value on +the right will be used as a replacement in that case. + +{{index "&& operator"}} + +The `&&` operator works similarly, but the other way +around. When the value to its left is something that converts to +false, it returns that value, and otherwise it returns the value on +its right. + +{{index "short-circuit evaluation"}} + +Another important property of these two +operators is that the expression to their right is evaluated only when +necessary. In the case of `true || X`, no matter what `X` is—even if +it's an expression that does something _terrible_—the result will be +true, and `X` is never evaluated. The same goes for `false && X`, +which is false and will ignore `X`. This is called _short-circuit +evaluation_. + +{{index "ternary operator", "?: operator", "conditional operator"}} + +The +conditional operator works in a similar way. The first expression is +always evaluated, but the second or third value, the one that is not +picked, is not. + +## Summary + +We looked at four types of JavaScript values in this chapter: numbers, +strings, Booleans, and undefined values. + +Such values are created by typing in their name (`true`, `null`) or +value (`13`, `"abc"`). You can combine and transform values with +operators. We saw binary operators for arithmetic (`+`, `-`, `*`, `/`, +and `%`), string concatenation (`+`), comparison (`==`, `!=`, `===`, +`!==`, `<`, `>`, `<=`, `>=`), and logic (`&&`, `||`), as well as +several unary operators (`-` to negate a number, `!` to negate +logically, and `typeof` to find a value's type) and a ternary +operator (`?:`) to pick one of two values based on a third value. + +This gives you enough information to use JavaScript as a pocket +calculator, but not much more. The +[next chapter](02_program_structure.html#program_structure) will +start tying these expressions together into basic programs. + diff --git a/02_program_structure.md b/02_program_structure.md new file mode 100644 index 000000000..fb119372e --- /dev/null +++ b/02_program_structure.md @@ -0,0 +1,1125 @@ +{{meta {chap_num: 2, prev_link: 01_values, next_link: 03_functions}}} + +# Program Structure + +{{quote {author: "_why", title: "Why's (Poignant) Guide to Ruby", chapter: true} + +And my heart glows bright red under my +filmy, translucent skin and they have to administer 10cc of JavaScript +to get me to come back. (I respond well to toxins in the blood.) Man, +that stuff will kick the peaches right out your gills! + +quote}} + +{{index why, "Poignant Guide"}} + +In this chapter, we will start to do +things that can actually be called _programming_. We will expand our +command of the JavaScript language beyond the nouns and sentence +fragments we've seen so far, to the point where we can +express some meaningful prose. + +## Expressions and statements + +{{index grammar, syntax, [code, "structure of"], grammar, [JavaScript, syntax]}} + +In +[Chapter 1](01_values.html#values), we made some values and then +applied operators to them to get new values. Creating values like this +is an essential part of every JavaScript program, but it is only +a part. + +{{index "literal expression"}} + +A fragment of code that produces a value is +called an _((expression))_. Every value that is written literally +(such as `22` or `"psychoanalysis"`) is an expression. An expression +between ((parentheses)) is also an expression, as is a ((binary +operator)) applied to two expressions or a unary operator applied to +one. + +{{index [nesting, "of expressions"], "human language"}} + +This shows part of the +beauty of a language-based interface. Expressions can nest in a way +very similar to the way subsentences in human languages are nested—a +subsentence can contain its own subsentences, and so on. This allows +us to combine expressions to express arbitrarily complex computations. + +{{index statement, semicolon, program}} + +If an expression +corresponds to a sentence fragment, a JavaScript _statement_ +corresponds to a full sentence in a human language. A program is +simply a list of statements. + +{{index syntax}} + +The simplest kind of statement is an expression with a +semicolon after it. This is a program: + +``` +1; +!false; +``` + +It is a useless program, though. An ((expression)) can be content to +just produce a value, which can then be used by the enclosing +expression. A ((statement)) stands on its own and amounts to something +only if it affects the world. It could display something on the +screen—that counts as changing the world—or it could change the +internal state of the machine in a way that will affect the statements +that come after it. These changes are called _((side effect))s_. The +statements in the previous example just produce the values `1` and +`true` and then immediately throw them away. This leaves no impression +on the world at all. When executing the program, nothing observable +happens. + +{{index "programming style", "automatic semicolon insertion", semicolon}} + +In some cases, JavaScript allows you to +omit the semicolon at the end of a statement. In other cases, it has +to be there, or the next ((line)) will be treated as part of the same +statement. The rules for when it can be safely omitted are somewhat +complex and error-prone. In this book, every statement that needs a +semicolon will always be terminated by one. I recommend you do the +same in your own programs, at least until you've learned more about +subtleties involved in leaving out semicolons. + +## Variables + +{{index syntax, [variable, definition], "side effect", memory}} + +How +does a program keep an internal ((state))? How does it remember +things? We have seen how to produce new values from old values, but +this does not change the old values, and the new value has to be +immediately used or it will dissipate again. To catch and hold values, +JavaScript provides a thing called a _variable_. + +``` +var caught = 5 * 5; +``` + +{{index "var keyword"}} + +And that gives us our second kind of ((statement)). +The special word (_((keyword))_) `var` indicates that this sentence is +going to define a variable. It is followed by the name of the variable +and, if we want to immediately give it a value, by an `=` operator and +an expression. + +The previous statement creates a variable called `caught` and uses it +to grab hold of the number that is produced by multiplying 5 by 5. + +After a variable has been defined, its name can be used as an +((expression)). The value of such an expression is the value the +variable currently holds. Here's an example: + +``` +var ten = 10; +console.log(ten * ten); +// → 100 +``` + +{{index "underscore character", "dollar sign", [variable, naming]}} + +Variable names can be any word that isn't +a reserved word (such as `var`). They may not include spaces. +Digits can also be part of variable names—`catch22` is a valid name, +for example—but the name must not start with a digit. A variable name +cannot include punctuation, except for the characters `$` and `_`. + +{{index "= operator", assignment, [variable, assignment]}} + +When a +variable points at a value, that does not mean it is tied to that +value forever. The `=` operator can be used at any time on existing +variables to disconnect them from their current value and have them +point to a new one. + +``` +var mood = "light"; +console.log(mood); +// → light +mood = "dark"; +console.log(mood); +// → dark +``` + +{{index [variable, "model of"], "tentacle (analogy)"}} + +You should +imagine variables as tentacles, rather than boxes. They do not +_contain_ values; they _grasp_ them—two variables can refer to the +same value. A program can access only the values that it still has a +hold on. When you need to remember something, you grow a tentacle to +hold on to it or you reattach one of your existing tentacles to it. + +{{figure {url: "img/octopus.jpg", alt: "Variables as tentacles"}}} + +Let's look at an example. To remember the number of dollars that Luigi +still owes you, you create a variable. And then when he pays back $35, +you give this variable a new value. + +``` +var luigisDebt = 140; +luigisDebt = luigisDebt - 35; +console.log(luigisDebt); +// → 105 +``` + +{{index undefined}} + +When you define a variable without giving it a value, +the tentacle has nothing to grasp, so it ends in thin air. If you ask +for the value of an empty variable, you'll get the value `undefined`. + +{{index "var keyword"}} + +A single `var` statement may define multiple +variables. The definitions must be separated by commas. + +``` +var one = 1, two = 2; +console.log(one + two); +// → 3 +``` + +## Keywords and reserved words + +{{index syntax, "implements (reserved word)", "interface (reserved word)", "let keyword", "package (reserved word)", "private (reserved word)", "protected (reserved word)", "public (reserved word)", "static (reserved word)", "void operator", "yield (reserved word)", "reserved word", [variable, naming]}} + +Words with +a special meaning, such as `var`, are _((keyword))s_, and they may not +be used as variable names. There are also a number of words that are +“reserved for use” in ((future)) versions of JavaScript. These are also +officially not allowed to be used as variable names, though some +JavaScript environments do allow them. The full list of keywords and +reserved words is rather long. + +```text/plain +break case catch class const continue debugger +default delete do else enum export extends false +finally for function if implements import in +instanceof interface let new null package private +protected public return static super switch this +throw true try typeof var void while with yield +``` + +Don't worry about memorizing these, but remember that this might be +the problem when a variable definition does not work as expected. + +## The environment + +{{index "standard environment"}} + +The collection of variables and their values +that exist at a given time is called the _((environment))_. When a +program starts up, this environment is not empty. It always contains +variables that are part of the language ((standard)), and most of the +time, it has variables that provide ways to interact with the +surrounding system. For example, in a ((browser)), there are variables +and functions to inspect and influence the currently loaded website +and to read ((mouse)) and ((keyboard)) input. + +## Functions + +{{index output, function, [function, application], "alert function", "message box"}} + +{{indexsee "application (of functions)", "function application"}} + +{{indexsee "invoking (of functions)", "function application"}} + +{{indexsee "calling (of functions)", "function application"}} + +A lot of the values provided in the +default environment have the type _((function))_. A function is a +piece of program wrapped in a value. Such values can be _applied_ in +order to run the wrapped program. For example, in a ((browser)) +environment, the variable `alert` holds a function that shows a little +((dialog box)) with a message. It is used like this: + +``` +alert("Good morning!"); +``` + +{{figure {url: "img/alert.png", alt: "An alert dialog",width: "8cm"}}} + +{{index parameter, [function, application]}} + +Executing a function is +called _invoking_, _calling_, or _applying_ it. You can call a +function by putting ((parentheses)) after an expression that produces a +function value. Usually you'll directly use the name of the variable +that holds the function. The values between the parentheses are given to +the program inside the function. In the example, the `alert` function +uses the string that we give it as the text to show in the dialog box. +Values given to functions are called _((argument))s_. The `alert` +function needs only one of them, but other functions might need a +different number or different types of arguments. + +## The console.log function + +{{index "JavaScript console", "developer tools", "Node.js", "console.log", output}} + +The `alert` function +can be useful as an output device when experimenting, but clicking +away all those little windows will get on your nerves. In past +examples, we've used `console.log` to output values. Most JavaScript +systems (including all modern web ((browser))s and Node.js) provide a +`console.log` function that writes out its arguments to _some_ text +output device. In browsers, the output lands in the ((JavaScript +console)). This part of the browser interface is hidden by default, +but most browsers open it when you press F12 or, on Mac, when you +press Command-Option-I. If that does not work, search through the +menus for an item named “web console” or “developer tools”. + +{{if interactive + +When running the examples, or your own code, on the pages of this +book, `console.log` output will be shown after the example, instead of +in the browser's JavaScript console. + +if}} + +``` +var x = 30; +console.log("the value of x is", x); +// → the value of x is 30 +``` + +{{index object}} + +Though ((variable)) names cannot contain ((period +character))s, `console.log` clearly has one. This is because +`console.log` isn't a simple variable. It is actually an expression +that retrieves the `log` ((property)) from the value held by the +`console` variable. We will find out exactly what this means in +[Chapter 4](04_data.html#properties). + +{{id return_values}} +## Return values + +{{index [comparison, "of numbers"], "return value", "Math.max function", maximum}} + +Showing a dialog box or writing text to +the screen is a _((side effect))_. A lot of functions are useful +because of the side effects they produce. Functions may also produce +values, and in that case, they don't need to have a side effect to be +useful. For example, the function `Math.max` takes any number of +number values and gives back the greatest. + +``` +console.log(Math.max(2, 4)); +// → 4 +``` + +{{index [function, application], minimum, "Math.min function"}} + +When a function produces a value, it is said to _return_ +that value. Anything that produces a value is an ((expression)) in +JavaScript, which means function calls can be used within larger +expressions. Here a call to `Math.min`, which is the opposite of +`Math.max`, is used as an input to the plus operator: + +``` +console.log(Math.min(2, 4) + 100); +// → 102 +``` + +The [next chapter](03_functions.html#functions) explains how to +write your own functions. + +## prompt and confirm + +{{index "dialog box", input, browser, "confirm function"}} + +Browser +environments contain other functions besides `alert` for popping up +windows. You can ask the user an OK/Cancel question using +`confirm`. This returns a Boolean: `true` if the user clicks OK and +`false` if the user clicks Cancel. + +``` +confirm("Shall we, then?"); +``` + +{{figure {url: "img/confirm.png", alt: "A confirm dialog",width: "8cm"}}} + +{{index input, "prompt function", "text input"}} + +The `prompt` function +can be used to ask an “open” question. The first argument is the +question, the second one is the text that the user starts with. A line +of text can be typed into the dialog window, and the function will +return this text as a string. + +``` +prompt("Tell me everything you know.", "..."); +``` + +{{figure {url: "img/prompt.png", alt: "An prompt dialog",width: "8cm"}}} + +These two functions aren't used much in modern web programming, mostly +because you have no control over the way the resulting windows look, +but they are useful for toy programs and experiments. + +## Control flow + +{{index "execution order", program, "control flow"}} + +When your program +contains more than one ((statement)), the statements are executed, +predictably, from top to bottom. As a basic example, this program has +two statements. The first one asks the user for a number, and the +second, which is executed afterward, shows the ((square)) of that +number. + +``` +var theNumber = Number(prompt("Pick a number", "")); +alert("Your number is the square root of " + + theNumber * theNumber); +``` + +{{index [number, "conversion to"], "type coercion", "Number function", "String function", "Boolean function", [Boolean, "conversion to"]}} + +The function `Number` converts a +value to a number. We need that conversion because the result of +`prompt` is a string value, and we want a number. There are similar +functions called `String` and `Boolean` that convert values to those +types. + +Here is the rather trivial schematic representation of straight +control flow: + +{{figure {url: "img/controlflow-straight.svg", alt: "Trivial control flow",width: "4cm"}}} + +## Conditional execution + +{{index Boolean, "control flow"}} + +Executing statements in straight-line +order isn't the only option we have. An alternative is _((conditional +execution))_, where we choose between two different routes based on a +Boolean value, like this: + +{{figure {url: "img/controlflow-if.svg", alt: "Conditional control flow",width: "4cm"}}} + +{{index syntax, "Number function", "if keyword"}} + +Conditional execution +is written with the `if` keyword in JavaScript. In the simple case, we +just want some code to be executed if, and only if, a certain +condition holds. For example, in the previous program, we might want +to show the square of the input only if the input is actually a +number. + +``` +var theNumber = Number(prompt("Pick a number", "")); +if (!isNaN(theNumber)) + alert("Your number is the square root of " + + theNumber * theNumber); +``` + +With this modification, if you enter “cheese”, no output will be shown. + +The keyword `if` executes or skips a statement depending on the value +of a Boolean expression. The deciding expression is written after the +keyword, between ((parentheses)), followed by the statement to execute. + +{{index "isNaN function"}} + +The `isNaN` function is a standard JavaScript +function that returns `true` only if the argument it is given is +`NaN`. The `Number` function happens to return `NaN` when you give it +a string that doesn't represent a valid number. Thus, the condition +translates to “unless `theNumber` is not-a-number, do this”. + +{{index "else keyword"}} + +You often won't just have code that executes when a +condition holds true, but also code that handles the other case. This +alternate path is represented by the second arrow in the +diagram. The `else` keyword can be used, together with `if`, to create +two separate, alternative execution paths. + +``` +var theNumber = Number(prompt("Pick a number", "")); +if (!isNaN(theNumber)) + alert("Your number is the square root of " + + theNumber * theNumber); +else + alert("Hey. Why didn't you give me a number?"); +``` + +{{index ["if keyword", chaining]}} + +If we have more than two paths to choose +from, multiple `if`/`else` pairs can be “chained” together. Here's an +example: + +``` +var num = Number(prompt("Pick a number", "0")); + +if (num < 10) + alert("Small"); +else if (num < 100) + alert("Medium"); +else + alert("Large"); +``` + +The program will first check whether `num` is less than 10. If it is, +it chooses that branch, shows `"Small"`, and is done. If it isn't, it +takes the `else` branch, which itself contains a second `if`. If the +second condition (`< 100`) holds, that means the number is between 10 +and 100, and `"Medium"` is shown. If it doesn't, the second, and last, +`else` branch is chosen. + +The flow chart for this program looks something like this: + +{{figure {url: "img/controlflow-nested-if.svg", alt: "Nested if control flow",width: "4cm"}}} + +{{id loops}} +## while and do loops + +{{index "even number"}} + +Consider a program that prints all even numbers from +0 to 12. One way to write this is as follows: + +``` +console.log(0); +console.log(2); +console.log(4); +console.log(6); +console.log(8); +console.log(10); +console.log(12); +``` + +{{index "control flow"}} + +That works, but the idea of writing a program is to +make something _less_ work, not more. If we needed all even numbers +less than 1,000, the previous would be unworkable. What we need is a +way to repeat some code. This form of control flow is called a +_((loop))_: + +{{figure {url: "img/controlflow-loop.svg", alt: "Loop control flow",width: "4cm"}}} + +{{index syntax, "counter variable"}} + +Looping control flow allows us to go +back to some point in the program where we were before and repeat it +with our current program state. If we combine this with a variable +that counts, we can do something like this: + +``` +var number = 0; +while (number <= 12) { + console.log(number); + number = number + 2; +} +// → 0 +// → 2 +// … etcetera +``` + +{{index "while loop", Boolean}} + +A ((statement)) starting with the +keyword `while` creates a loop. The word `while` is followed by an +((expression)) in ((parentheses)) and then a statement, much like `if`. +The loop executes that statement as long as the expression produces a +value that is `true` when converted to Boolean type. + +{{index grouping, "{} (block)", block}} + +In this loop, we want to both +print the current number and add two to our variable. Whenever we need +to execute multiple ((statement))s inside a loop, we wrap them in +((curly braces)) (`{` and `}`). Braces do for statements what +((parentheses)) do for expressions: they group them together, making +them count as a single statement. A sequence of statements wrapped in +braces is called a _block_. + +{{index "programming style"}} + +Many JavaScript programmers wrap every single +loop or `if` body in braces. They do this both for the sake of +consistency and to avoid having to add or remove braces when changing +the number of statements in the body later. In this book, I will write +most single-statement bodies without braces, since I value brevity. +You are free to go with whichever style you prefer. + +{{index comparison, state}} + +The variable `number` demonstrates the way +a ((variable)) can track the progress of a program. Every time the +loop repeats, `number` is incremented by `2`. Then, at the beginning +of every repetition, it is compared with the number `12` to decide +whether the program has done all the work it intended to do. + +{{index exponentiation}} + +As an example that actually does something useful, +we can now write a program that calculates and shows the value of +2^10^ (2 to the 10th power). We use two variables: one to keep +track of our result and one to count how often we have multiplied this +result by 2. The loop tests whether the second variable has reached 10 +yet and then updates both variables. + +``` +var result = 1; +var counter = 0; +while (counter < 10) { + result = result * 2; + counter = counter + 1; +} +console.log(result); +// → 1024 +``` + +The counter could also start at `1` and check for `<= 10`, but, for +reasons that will become apparent in +[Chapter 4](04_data.html#array_indexing), it is a good idea to get +used to counting from 0. + +{{index "loop body", "do loop", "control flow"}} + +The `do` loop is a +control structure similar to the `while` loop. It differs only on one +point: a `do` loop always executes its body at least once, and it +starts testing whether it should stop only after that first execution. +To reflect this, the test appears after the body of the loop: + +``` +do { + var yourName = prompt("Who are you?"); +} while (!yourName); +console.log(yourName); +``` + +{{index [Boolean, "conversion to"], "! operator"}} + +This program will +force you to enter a name. It will ask again and again until it gets +something that is not an empty string. Applying the `!` operator will +convert a value to Boolean type before negating it, and all strings +except `""` convert to `true`. This means the loop continues going round +until you provide a name that is not the empty string. + +## Indenting Code + +{{index block, "code structure", whitespace, "programming style"}} + +You've probably noticed the spaces I put in front of some +statements. In JavaScript, these are not required—the computer will +accept the program just fine without them. In fact, even the ((line)) +breaks in programs are optional. You could write a program as a single +long line if you felt like it. The role of the ((indentation)) inside +blocks is to make the structure of the code stand out. In complex +code, where new blocks are opened inside other blocks, it can become +hard to see where one block ends and another begins. With proper +indentation, the visual shape of a program corresponds to the shape of +the blocks inside it. I like to use two spaces for every open block, +but tastes differ—some people use four spaces, and some people use +((tab character))s. + +## for loops + +{{index syntax, "while loop", "counter variable"}} + +Many loops follow +the pattern seen in the previous `while` examples. First, a “counter” +variable is created to track the progress of the loop. Then comes a +`while` loop, whose test expression usually checks whether the counter +has reached some boundary yet. At the end of the loop body, the +counter is updated to track progress. + +{{index "for loop", loop}} + +Because this pattern is so common, JavaScript and +similar languages provide a slightly shorter and more comprehensive +form, the `for` loop. + +``` +for (var number = 0; number <= 12; number = number + 2) + console.log(number); +// → 0 +// → 2 +// … etcetera +``` + +{{index "control flow", state}} + +This program is exactly equivalent to the +[earlier](02_program_structure.html#loops) even-number-printing +example. The only change is that all the ((statement))s that are +related to the “state” of the loop are now grouped together. + +The ((parentheses)) after a `for` keyword must contain two +((semicolon))s. The part before the first semicolon _initializes_ the +loop, usually by defining a ((variable)). The second part is the +((expression)) that _checks_ whether the loop must continue. The final +part _updates_ the state of the loop after every iteration. In most +cases, this is shorter and clearer than a `while` construct. + +{{index exponentiation}} + +Here is the code that computes 2^10^, using `for` +instead of `while`: + +``` +var result = 1; +for (var counter = 0; counter < 10; counter = counter + 1) + result = result * 2; +console.log(result); +// → 1024 +``` + +{{index "programming style", indentation}} + +Note that even though no block +is opened with a `{`, the statement in the loop is still indented two +spaces to make it clear that it “belongs” to the line before it. + +## Breaking Out of a Loop + +{{index [loop, "termination of"], "break keyword"}} + +Having the loop's +condition produce `false` is not the only way a loop can finish. There +is a special statement called `break` that has the effect of +immediately jumping out of the enclosing loop. + +This program illustrates the `break` statement. It finds the first number +that is both greater than or equal to 20 and divisible by 7. + +``` +for (var current = 20; ; current++) { + if (current % 7 == 0) + break; +} +console.log(current); +// → 21 +``` + +{{index "remainder operator", "% operator"}} + +Using the remainder +(`%`) operator is an easy way to test whether a number is divisible by +another number. If it is, the remainder of their division is zero. + +{{index "for loop"}} + +The `for` construct in the example does not have a part +that checks for the end of the loop. This means that the loop will +never stop unless the `break` statement inside is executed. + +If you were to leave out that `break` statement or accidentally write +a condition that always produces `true`, your program would get stuck +in an _((infinite loop))_. A program stuck in an infinite loop will +never finish running, which is usually a bad thing. + +{{if interactive + +If you create an infinite loop in one of the examples on these pages, +you'll usually be asked whether you want to stop the script after a +few seconds. If that fails, you will have to close the tab that you're +working in, or on some browsers close your whole browser, in order to +recover. + +if}} + +{{index "continue keyword"}} + +The `continue` keyword is similar to `break`, in +that it influences the progress of a loop. When `continue` is +encountered in a loop body, control jumps out of the body and +continues with the loop's next iteration. + +## Updating variables succinctly + +{{index assignment, "+= operator", "-= operator", "/= operator", "*= operator", state, "side effect"}} + +Especially +when looping, a program often needs to “update” a variable to hold a +value based on that variable's previous value. + +{{test no}} + +``` +counter = counter + 1; +``` + +JavaScript provides a shortcut for this: + +{{test no}} + +``` +counter += 1; +``` + +Similar shortcuts work for many other operators, such as `result *= 2` to +double `result` or `counter -= 1` to count downward. + +This allows us to shorten our counting example a little more. + +``` +for (var number = 0; number <= 12; number += 2) + console.log(number); +``` + +{{index "++ operator", "-- operator"}} + +For `counter += 1` and `counter -= +1`, there are even shorter equivalents: `counter++` and `counter--`. + +## Dispatching on a value with switch + +{{index syntax, "conditional execution", dispatching, ["if keyword", chaining]}} + +It is common for code to look like this: + +{{test no}} + +``` +if (variable == "value1") action1(); +else if (variable == "value2") action2(); +else if (variable == "value3") action3(); +else defaultAction(); +``` + +{{index "colon character", "switch keyword"}} + +There is a construct called +`switch` that is intended to solve such a “dispatch” in a more direct +way. Unfortunately, the syntax JavaScript uses for this (which it +inherited from the C/Java line of programming languages) is somewhat +awkward—a chain of `if` statements often looks better. Here is an +example: + +``` +switch (prompt("What is the weather like?")) { + case "rainy": + console.log("Remember to bring an umbrella."); + break; + case "sunny": + console.log("Dress lightly."); + case "cloudy": + console.log("Go outside."); + break; + default: + console.log("Unknown weather type!"); + break; +} +``` + +{{index fallthrough, comparison, "break keyword", "case keyword", "default keyword"}} + +You may put any number of `case` labels +inside the block opened by `switch`. The program will jump to the +label that corresponds to the value that `switch` was given or to +`default` if no matching value is found. It starts executing +statements there, even if they're under another label, until it +reaches a `break` statement. In some cases, such as the `"sunny"` case +in the example, this can be used to share some code between cases (it +recommends going outside for both sunny and cloudy weather). But +beware: it is easy to forget such a `break`, which will cause the +program to execute code you do not want executed. + +## Capitalization + +{{index capitalization, [variable, naming], whitespace}} + +Variable +names may not contain spaces, yet it is often helpful to use multiple +words to clearly describe what the variable represents. These are +pretty much your choices for writing a variable name with several +words in it: + +```null +fuzzylittleturtle +fuzzy_little_turtle +FuzzyLittleTurtle +fuzzyLittleTurtle +``` + +{{index "camel case", "programming style", "underscore character"}} + +The +first style can be hard to read. Personally, I like the look of the +underscores, though that style is a little painful to type. The +((standard)) JavaScript functions, and most JavaScript programmers, +follow the bottom style—they capitalize every word except the first. +It is not hard to get used to little things like that, and code with +mixed naming styles can be jarring to read, so we will just follow +this ((convention)). + +{{index "Number function", constructor}} + +In a few cases, such as the +`Number` function, the first letter of a variable is also capitalized. +This was done to mark this function as a constructor. What a +constructor is will become clear in +[Chapter 6](06_object.html#constructors). For now, the important +thing is not to be bothered by this apparent lack of ((consistency)). + +## Comments + +{{index readability}} + +Often, raw code does not convey all the information +you want a program to convey to human readers, or it conveys it in +such a cryptic way that people might not understand it. At other +times, you might just feel poetic or want to include some thoughts as +part of your program. This is what _((comment))s_ are for. + +{{index "slash character", "line comment"}} + +A comment is a piece of text +that is part of a program but is completely ignored by the computer. +JavaScript has two ways of writing comments. To write a single-line +comment, you can use two slash characters (`//`) and then the comment +text after it. + +{{test no}} + +``` +var accountBalance = calculateBalance(account); +// It's a green hollow where a river sings +accountBalance.adjust(); +// Madly catching white tatters in the grass. +var report = new Report(); +// Where the sun on the proud mountain rings: +addToReport(accountBalance, report); +// It's a little valley, foaming like light in a glass. +``` + +{{index "block comment"}} + +A `//` comment goes only to the end of the line. A +section of text between `/*` and `*/` will be ignored, regardless of +whether it contains line breaks. This is often useful for adding +blocks of information about a file or a chunk of program. + +``` +/* + I first found this number scrawled on the back of one of + my notebooks a few years ago. Since then, it has often + dropped by, showing up in phone numbers and the serial + numbers of products that I've bought. It obviously likes + me, so I've decided to keep it. +*/ +var myNumber = 11213; +``` + +## Summary + +You now know that a program is built out of statements, which +themselves sometimes contain more statements. Statements tend to +contain expressions, which themselves can be built out of smaller +expressions. + +Putting statements after one another gives you a program that is +executed from top to bottom. You can introduce disturbances in the +flow of control by using conditional (`if`, `else`, and `switch`) and +looping (`while`, `do`, and `for`) statements. + +Variables can be used to file pieces of data under a name, and they +are useful for tracking state in your program. The environment is the +set of variables that are defined. JavaScript systems +always put a number of useful standard variables into your +environment. + +Functions are special values that encapsulate a piece of program. You +can invoke them by writing `functionName(argument1, argument2)`. Such +a function call is an expression, and may produce a value. + +## Exercises + +{{index exercises}} + +If you are unsure how to try your solutions to +exercises, refer to the [introduction](00_intro.html#intro). + +Each exercise starts with a problem description. Read that and try to +solve the exercise. If you run into problems, consider reading the +hints (!interactive after the exercise!)(!book at the [end of the book](hints.html#hints)!). +Full solutions to the exercises are not included in this +book, but you can find them online at +http://eloquentjavascript.net/code[_eloquentjavascript.net/code_]. +If you want to learn something from the exercises, I recommend looking +at the solutions only after you've solved the exercise, or at least +after you've attacked it long and hard enough to have a slight +headache. + +### Looping a triangle + +{{index "triangle (exercise)"}} + +Write a ((loop)) that makes seven calls to +`console.log` to output the following triangle: + +```null +# +## +### +#### +##### +###### +####### +``` + +It may be useful to know that you can find the length of a string by +writing `.length` after it. + +``` +var abc = "abc"; +console.log(abc.length); +// → 3 +``` + +{{if interactive + +Most exercises contain a piece of code that you can modify to solve +the exercise. Remember that you can click code blocks to edit them. + +``` +// Your code here. +``` +if}} + +{{hint + +{{index "triangle (exercise)"}} + +You can start with a program that simply +prints out the numbers 1 to 7, which you can derive by making a few +modifications to the +[even number printing example](02_program_structure.html#loops) +given earlier in the chapter, where the `for` loop was introduced. + +Now consider the equivalence between numbers and strings of hash +characters. You can go from 1 to 2 by adding 1 (`+= 1`). You can go +from `"#"` to `"##"` by adding a character (`+= "#"`). Thus, your +solution can closely follow the number-printing program. + +hint}} + +### FizzBuzz + +{{index "FizzBuzz (exercise)", loop, "conditional execution"}} + +Write a +program that uses `console.log` to print all the numbers from 1 to +100, with two exceptions. For numbers divisible by 3, print `"Fizz"` +instead of the number, and for numbers divisible by 5 (and not 3), +print `"Buzz"` instead. + +When you have that working, modify your program to print `"FizzBuzz"`, +for numbers that are divisible by both 3 and 5 (and still print +`"Fizz"` or `"Buzz"` for numbers divisible by only one of those). + +(This is actually an ((interview question)) that has been claimed to +weed out a significant percentage of programmer candidates. So if you +solved it, you're now allowed to feel good about yourself.) + +{{if interactive +``` +// Your code here. +``` +if}} + +{{hint + +{{index "FizzBuzz (exercise)", "remainder operator", "% operator"}} + +Going +over the numbers is clearly a looping job, and selecting what to print +is a matter of conditional execution. Remember the trick of using the +remainder (`%`) operator for checking whether a number is divisible by +another number (has a remainder of zero). + +In the first version, there are three possible outcomes for every +number, so you'll have to create an `if`/`else if`/`else` chain. + +{{index "|| operator", ["if keyword", chaining]}} + +The second version of the +program has a straightforward solution and a clever one. The simple +way is to add another “branch” to precisely test the given condition. +For the clever method, build up a string containing the word or words +to output, and print either this word or the number if there is no +word, potentially by making elegant use of the `||` operator. + +hint}} + +### Chess board + +{{index "chess board (exercise)", loop, [nesting, "of loops"], "newline character"}} + +Write a program that creates a string that represents an +8×8 grid, using newline characters to separate lines. At each position +of the grid there is either a space or a “#” character. The characters +should form a chess board. + +Passing this string to `console.log` should show something like this: + +```null + # # # # +# # # # + # # # # +# # # # + # # # # +# # # # + # # # # +# # # # +``` + +When you have a program that generates this pattern, define a +((variable)) `size = 8` and change the program so that it works for +any `size`, outputting a grid of the given width and height. + +{{if interactive +``` +// Your code here. +``` +if}} + +{{hint + +{{index "chess board (exercise)"}} + +The string can be built by starting with +an empty one (`""`) and repeatedly adding characters. A newline +character is written `"\n"`. + +Use `console.log` to inspect the output of your program. + +{{index [nesting, "of loops"]}} + +To work with two ((dimensions)), you will need a +((loop)) inside of a loop. Put ((curly braces)) around the bodies of +both loops to make it easy to see where they start and end. Try to +properly indent these bodies. The order of the loops must follow the +order in which we build up the string (line by line, left to right, +top to bottom). So the outer loop handles the lines and the inner loop +handles the characters on a line. + +{{index "counter variable", "remainder operator", "% operator"}} + +You'll +need two variables to track your progress. To know whether to put a +space or a hash sign at a given position, you could test whether the +sum of the two counters is even (`% 2`). + +Terminating a line by adding a newline character happens after the +line has been built up, so do this after the inner loop but inside of +the outer loop. + +hint}} + diff --git a/03_functions.md b/03_functions.md new file mode 100644 index 000000000..fa46f6d1a --- /dev/null +++ b/03_functions.md @@ -0,0 +1,1140 @@ +{{meta {chap_num: 3, prev_link: 02_program_structure, next_link: 04_data}}} + +# Functions + +{{quote {author: "Donald Knuth", chapter: true} + +People think that computer science is the art of +geniuses but the actual reality is the opposite, just many people +doing things that build on each other, like a wall of mini stones. + +quote}} + +{{index "Knuth, Donald", function, "code structure"}} + +You've seen function values, such +as `alert`, and how to call them. Functions are the bread and butter +of JavaScript programming. The concept of wrapping a piece of program +in a value has many uses. It is a tool to structure larger programs, +to reduce repetition, to associate names with subprograms, and to +isolate these subprograms from each other. + +{{index "human language"}} + +The most obvious application of functions is +defining new ((vocabulary)). Creating new words in regular, +human-language prose is usually bad style. But in programming, it is +indispensable. + +{{index abstraction}} + +Typical adult English speakers have some 20,000 words +in their vocabulary. Few programming languages come with 20,000 +commands built in. And the vocabulary that _is_ available tends to be +more precisely defined, and thus less flexible, than in human +language. Therefore, we usually _have_ to add some of our own +vocabulary to avoid repeating ourselves too much. + +## Defining a function + +{{index square, [function, definition]}} + +A function definition is just a +regular ((variable)) definition where the value given to the variable +happens to be a function. For example, the following code defines the +variable `square` to refer to a function that produces the square of a +given number: + +``` +var square = function(x) { + return x * x; +}; + +console.log(square(12)); +// → 144 +``` + +{{index "curly braces", block, syntax, "function keyword", [function, body], [function, "as value"]}} + +{{indexsee braces, "curly braces"}} + +A function is +created by an expression that starts with the keyword `function`. +Functions have a set of _((parameter))s_ (in this case, only `x`) and +a _body_, which contains the statements that are to be executed when +the function is called. The function body must always be wrapped in +braces, even when it consists of only a single ((statement)) (as +in the previous example). + +{{index "power example"}} + +A function can have multiple parameters or no +parameters at all. In the following example, `makeNoise` does not list +any parameter names, whereas `power` lists two: + +``` +var makeNoise = function() { + console.log("Pling!"); +}; + +makeNoise(); +// → Pling! + +var power = function(base, exponent) { + var result = 1; + for (var count = 0; count < exponent; count++) + result *= base; + return result; +}; + +console.log(power(2, 10)); +// → 1024 +``` + +{{index "return value", "return keyword", undefined}} + +Some functions +produce a value, such as `power` and `square`, and some don't, such as +`makeNoise`, which produces only a ((side effect)). A `return` +statement determines the value the function returns. When control +comes across such a statement, it immediately jumps out of the current +function and gives the returned value to the code that called the +function. The `return` keyword without an expression after it will +cause the function to return `undefined`. + +## Parameters and scopes + +{{index [function, application], [variable, "from parameter"]}} + +The +((parameter))s to a function behave like regular variables, but their +initial values are given by the _caller_ of the function, not the code +in the function itself. + +{{index [function, scope], scope, "local variable"}} + +An +important property of functions is that the variables created inside +of them, including their parameters, are _local_ to the function. This +means, for example, that the `result` variable in the `power` example +will be newly created every time the function is called, and these +separate incarnations do not interfere with each other. + +{{index "var keyword", "global scope", [variable, global]}} + +{{indexsee "top-level scope", "global scope"}} + +This +“localness” of variables applies only to the parameters and to variables +declared with the `var` keyword inside the function body. Variables +declared outside of any function are called _global_, because they are +visible throughout the program. It is possible to access such +variables from inside a function, as long as you haven't declared a +local variable with the same name. + +{{index [variable, assignment]}} + +The following code demonstrates this. It +defines and calls two functions that both assign a value to the +variable `x`. The first one declares the variable as local and thus +changes only the local variable. The second does not declare `x` +locally, so references to `x` inside of it refer to the global +variable `x` defined at the top of the example. + +``` +var x = "outside"; + +var f1 = function() { + var x = "inside f1"; +}; +f1(); +console.log(x); +// → outside + +var f2 = function() { + x = "inside f2"; +}; +f2(); +console.log(x); +// → inside f2 +``` + +{{index [variable, naming], scope, "global scope", [code, "structure of"]}} + +This behavior helps prevent accidental interference between +functions. If all variables were shared by the whole program, it'd +take a lot of effort to make sure no name is ever used for two +different purposes. And if you _did_ reuse a variable name, you might +see strange effects from unrelated code messing with the value of your +variable. By treating function-local variables as existing only within +the function, the language makes it possible to read and understand +functions as small universes, without having to worry about all the +code at once. + +{{id scoping}} +## Nested scope + +{{index [nesting, "of functions"], [nesting, "of scope"], scope, "inner function", "lexical scoping"}} + +JavaScript distinguishes not just between _global_ and +_local_ variables. Functions can be created inside other functions, +producing several degrees of locality. + +{{index "landscape example"}} + +For example, this rather nonsensical function +has two functions inside of it: + +``` +var landscape = function() { + var result = ""; + var flat = function(size) { + for (var count = 0; count < size; count++) + result += "_"; + }; + var mountain = function(size) { + result += "/"; + for (var count = 0; count < size; count++) + result += "'"; + result += "\\"; + }; + + flat(3); + mountain(4); + flat(6); + mountain(1); + flat(1); + return result; +}; + +console.log(landscape()); +// → __/''''\_____/'\_ +``` + +{{index [function, scope], scope}} + +The `flat` and `mountain` functions +can “see” the variable called `result`, since they are inside the +function that defines it. But they cannot see each other's `count` +variables since they are outside each other's scope. The environment +outside of the `landscape` function doesn't see any of the variables +defined inside `landscape`. + +In short, each local scope can also see all the local scopes that +contain it. The set of variables visible inside a function is +determined by the place of that function in the program text. All +variables from blocks _around_ a function's definition are +visible—meaning both those in function bodies that enclose it and +those at the top level of the program. This approach to variable +visibility is called _((lexical scoping))_. + +{{index "{} (block)"}} + +People who have experience with other programming +languages might expect that any block of code between braces produces +a new local environment. But in JavaScript, functions are the only +things that create a new scope. You are allowed to use free-standing +blocks. + +``` +var something = 1; +{ + var something = 2; + // Do stuff with variable something... +} +// Outside of the block again... +``` + +But the `something` inside the block refers to the same variable as +the one outside the block. In fact, although blocks like this are +allowed, they are useful only to group the body of an `if` statement +or a loop. + +{{index "let keyword", "ECMAScript 6"}} + +If you find this odd, you're not +alone. The next version of JavaScript will introduce a `let` keyword, +which works like `var` but creates a variable that is local to the +enclosing _block_, not the enclosing _function_. + +## Functions as values + +{{index [function, "as value"]}} + +Function ((variable))s usually simply act as +names for a specific piece of the program. Such a variable is defined +once and never changed. This makes it easy to start confusing the +function and its name. + +{{index [variable, assignment]}} + +But the two are different. A function value +can do all the things that other values can do—you can use it in +arbitrary ((expression))s, not just call it. It is possible to store a +function value in a new place, pass it as an argument to a function, +and so on. Similarly, a variable that holds a function is still just a +regular variable and can be assigned a new value, like so: + +{{test no}} + +``` +var launchMissiles = function(value) { + missileSystem.launch("now"); +}; +if (safeMode) + launchMissiles = function(value) {/* do nothing */}; +``` + +{{index [function, "higher-order"]}} + +In +[Chapter 5](05_higher_order.html#higher_order), we will discuss the +wonderful things that can be done by passing around function values to +other functions. + +## Declaration notation + +{{index syntax, "square example", "function keyword", [function, definition], [function, declaration]}} + +There is +a slightly shorter way to say “_var square = function…_”. The +`function` keyword can also be used at the start of a statement, as in +the following: + +``` +function square(x) { + return x * x; +} +``` + +{{index future, "execution order"}} + +This is a function _declaration_. The +statement defines the variable `square` and points it at the given +function. So far so good. There is one subtlety with this form of +function definition, however. + +``` +console.log("The future says:", future()); + +function future() { + return "We STILL have no flying cars."; +} +``` + +This code works, even though the function is defined _below_ the code +that uses it. This is because function declarations are not part of +the regular top-to-bottom flow of control. They are conceptually moved +to the top of their scope and can be used by all the code in that +scope. This is sometimes useful because it gives us the freedom to +order code in a way that seems meaningful, without worrying about +having to define all functions above their first use. + +{{index [function, declaration]}} + +What happens when you put such a function +definition inside a conditional (`if`) block or a loop? Well, don't do +that. Different JavaScript platforms in different browsers have +traditionally done different things in that situation, and the latest +((standard)) actually forbids it. If you want your programs to behave +consistently, only use this form of function-defining statements in +the outermost block of a function or program. + +``` +function example() { + function a() {} // Okay + if (something) { + function b() {} // Danger! + } +} +``` + +{{id stack}} +## The call stack + +{{index "call stack", [function, application]}} + +{{indexsee stack, "call stack"}} + +It will be helpful to take a +closer look at the way control flows through functions. Here is a +simple program that makes a few function calls: + +``` +function greet(who) { + console.log("Hello " + who); +} +greet("Harry"); +console.log("Bye"); +``` + +{{index "control flow", "execution order", "console.log"}} + +A run through +this program goes roughly like this: the call to `greet` causes +control to jump to the start of that function (line 2). It calls +`console.log` (a built-in browser function), which takes control, does +its job, and then returns control to line 2. Then it reaches the end +of the `greet` function, so it returns to the place that called it, at +line 4. The line after that calls `console.log` again. + +We could show the flow of control schematically like this: + +```null +top + greet + console.log + greet +top + console.log +top +``` + +{{index "return keyword", memory}} + +Because a function has to jump back to +the place of the call when it returns, the computer must remember the +context from which the function was called. In one case, `console.log` +has to jump back to the `greet` function. In the other case, it jumps +back to the end of the program. + +The place where the computer stores this context is the _((call +stack))_. Every time a function is called, the current context is put +on top of this “stack”. When the function returns, it removes the top +context from the stack and uses it to continue execution. + +{{index "infinite loop", "stack overflow", recursion}} + +Storing this +stack requires space in the computer's memory. When the stack grows +too big, the computer will fail with a message like “out of stack +space” or “too much recursion”. The following code illustrates this by +asking the computer a really hard question, which causes an infinite +back-and-forth between two functions. Rather, it _would_ be infinite, +if the computer had an infinite stack. As it is, we will run out of +space, or “blow the stack”. + +{{test no}} + +``` +function chicken() { + return egg(); +} +function egg() { + return chicken(); +} +console.log(chicken() + " came first."); +// → ?? +``` + +## Optional Arguments + +{{index argument, [function, application]}} + +The following code is allowed +and executes without any problem: + +``` +alert("Hello", "Good Evening", "How do you do?"); +``` + +{{index "alert function"}} + +The function `alert` officially accepts only one +argument. Yet when you call it like this, it doesn't complain. It +simply ignores the other arguments and shows you “Hello”. + +{{index undefined, parameter}} + +JavaScript is extremely broad-minded +about the number of arguments you pass to a function. If you pass too +many, the extra ones are ignored. If you pass too few, the missing +parameters simply get assigned the value `undefined`. + +The downside of this is that it is possible—likely, even—that you'll +accidentally pass the wrong number of arguments to functions and no +one will tell you about it. + +{{index "power example", "optional argument"}} + +{{id power}} +The +upside is that this behavior can be used to have a function take +“optional” arguments. For example, the following version of `power` +can be called either with two arguments or with a single argument, in +which case the exponent is assumed to be two, and the function behaves +like `square`. + +{{test wrap}} + +``` +function power(base, exponent) { + if (exponent == undefined) + exponent = 2; + var result = 1; + for (var count = 0; count < exponent; count++) + result *= base; + return result; +} + +console.log(power(4)); +// → 16 +console.log(power(4, 3)); +// → 64 +``` + +{{index "console.log"}} + +In the link:04_data.html#arguments_object[next +chapter], we will see a way in which a function body can get at the +exact list of arguments that were passed. This is helpful because it +makes it possible for a function to accept any number of arguments. +For example, `console.log` makes use of this—it outputs all of the +values it is given. + +``` +console.log("R", 2, "D", 2); +// → R 2 D 2 +``` + +## Closure + +{{index "call stack", "local variable", [function, "as value"], closure, scope}} + +The ability to treat functions as +values, combined with the fact that local variables are “re-created” +every time a function is called, brings up an interesting question. +What happens to local variables when the function call that created +them is no longer active? + +The following code shows an example of this. It defines a function, +`wrapValue`, which creates a local variable. It then returns a function +that accesses and returns this local variable. + +``` +function wrapValue(n) { + var localVariable = n; + return function() { return localVariable; }; +} + +var wrap1 = wrapValue(1); +var wrap2 = wrapValue(2); +console.log(wrap1()); +// → 1 +console.log(wrap2()); +// → 2 +``` + +This is allowed and works as you'd hope—the variable can still be +accessed. In fact, multiple instances of the variable can be alive at +the same time, which is another good illustration of the concept that +local variables really are re-created for every call—different calls +can't trample on one another's local variables. + +This feature—being able to reference a specific instance of local +variables in an enclosing function—is called _closure_. A function +that “closes over” some local variables is called _a_ closure. This +behavior not only frees you from having to worry about lifetimes of +variables but also allows for some creative use of function values. + +{{index "multiplier function"}} + +With a slight change, we can turn the +previous example into a way to create functions that multiply by an +arbitrary amount. + +``` +function multiplier(factor) { + return function(number) { + return number * factor; + }; +} + +var twice = multiplier(2); +console.log(twice(5)); +// → 10 +``` + +{{index [variable, "from parameter"]}} + +The explicit `localVariable` from the +`wrapValue` example isn't needed since a parameter is itself a local +variable. + +{{index [function, "model of"]}} + +Thinking about programs like this takes some +practice. A good mental model is to think of the `function` keyword as +“freezing” the code in its body and wrapping it into a package (the +function value). So when you read `return function(...) {...}`, think +of it as returning a handle to a piece of computation, frozen for +later use. + +In the example, `multiplier` returns a frozen chunk of code that gets +stored in the `twice` variable. The last line then calls the value in +this variable, causing the frozen code (`return number * factor;`) to +be activated. It still has access to the `factor` variable from the +`multiplier` call that created it, and in addition it gets access to +the argument passed when unfreezing it, 5, through its `number` +parameter. + +## Recursion + +{{index "power example", "stack overflow", recursion, [function, application]}} + +It is perfectly +okay for a function to call itself, as long as it takes care not to +overflow the stack. A function that calls itself is called +_recursive_. Recursion allows some functions to be written in a +different style. Take, for example, this alternative implementation of +`power`: + +{{test wrap}} + +``` +function power(base, exponent) { + if (exponent == 0) + return 1; + else + return base * power(base, exponent - 1); +} + +console.log(power(2, 3)); +// → 8 +``` + +{{index loop, readability, mathematics}} + +This is rather +close to the way mathematicians define exponentiation and arguably +describes the concept in a more elegant way than the looping variant +does. The function calls itself multiple times with different +arguments to achieve the repeated multiplication. + +{{index [function, application], efficiency}} + +But this implementation has +one important problem: in typical JavaScript implementations, it's +about 10 times slower than the looping version. Running through a +simple loop is a lot cheaper than calling a function multiple times. + +{{index optimization}} + +The dilemma of speed versus ((elegance)) is an +interesting one. You can see it as a kind of continuum between +human-friendliness and machine-friendliness. Almost any program can be +made faster by making it bigger and more convoluted. The programmer +must decide on an appropriate balance. + +In the case of the [earlier](03_functions.html#power) `power` +function, the inelegant (looping) version is still fairly simple and +easy to read. It doesn't make much sense to replace it with the +recursive version. Often, though, a program deals with such complex +concepts that giving up some efficiency in order to make the program +more straightforward becomes an attractive choice. + +{{index profiling}} + +The basic rule, which has been repeated by many +programmers and with which I wholeheartedly agree, is to not worry +about efficiency until you know for sure that the program is too slow. +If it is, find out which parts are taking up the most time, and start +exchanging elegance for efficiency in those parts. + +Of course, this rule doesn't mean one should start ignoring +performance altogether. In many cases, like the `power` function, not +much simplicity is gained from the “elegant” approach. And sometimes +an experienced programmer can see right away that a simple approach is +never going to be fast enough. + +{{index "premature optimization"}} + +The reason I'm stressing this is that +surprisingly many beginning programmers focus fanatically on +efficiency, even in the smallest details. The result is bigger, more +complicated, and often less correct programs, that take longer to +write than their more straightforward equivalents and that usually run +only marginally faster. + +{{index "branching recursion"}} + +But recursion is not always just a +less-efficient alternative to looping. Some problems are much easier +to solve with recursion than with loops. Most often these are problems +that require exploring or processing several “branches”, each of which +might branch out again into more branches. + +{{id recursive_puzzle}} + +{{index recursion, "number puzzle example"}} + +Consider this puzzle: by +starting from the number 1 and repeatedly either adding 5 or +multiplying by 3, an infinite amount of new numbers can be produced. +How would you write a function that, given a number, tries to find a +sequence of such additions and multiplications that produce that +number? For example, the number 13 could be reached by first +multiplying by 3 and then adding 5 twice, whereas the number 15 cannot +be reached at all. + +Here is a recursive solution: + +``` +function findSolution(target) { + function find(current, history) { + if (current == target) + return history; + else if (current > target) + return null; + else + return find(current + 5, "(" + history + " + 5)") || + find(current * 3, "(" + history + " * 3)"); + } + return find(1, "1"); +} + +console.log(findSolution(24)); +// → (((1 * 3) + 5) * 3) +``` + +Note that this program doesn't necessarily find the _shortest_ +sequence of operations. It is satisfied when it finds any sequence at +all. + +I don't necessarily expect you to see how it works right away. But +let's work through it, since it makes for a great exercise in +recursive thinking. + +The inner function `find` does the actual recursing. It takes two +((argument))s—the current number and a string that records how we +reached this number—and returns either a string that shows how to get +to the target or `null`. + +{{index null, "|| operator", "short-circuit evaluation"}} + +To do this, the +function performs one of three actions. If the current number is the +target number, the current history is a way to reach that target, so +it is simply returned. If the current number is greater than the +target, there's no sense in further exploring this history since both +adding and multiplying will only make the number bigger. And finally, +if we're still below the target, the function tries both possible +paths that start from the current number, by calling itself twice, +once for each of the allowed next steps. If the first call returns +something that is not `null`, it is returned. Otherwise, the second +call is returned—regardless of whether it produces a string or `null`. + +{{index "call stack"}} + +To better understand how this function produces the +effect we're looking for, let's look at all the calls to `find` that +are made when searching for a solution for the number 13. + +```null +find(1, "1") + find(6, "(1 + 5)") + find(11, "((1 + 5) + 5)") + find(16, "(((1 + 5) + 5) + 5)") + too big + find(33, "(((1 + 5) + 5) * 3)") + too big + find(18, "((1 + 5) * 3)") + too big + find(3, "(1 * 3)") + find(8, "((1 * 3) + 5)") + find(13, "(((1 * 3) + 5) + 5)") + found! +``` + +The indentation suggests the depth of the call stack. The first time +`find` is called it calls itself twice to explore the solutions that start with +`(1 + 5)` and `(1 * 3)`. The first call tries to find a solution that +starts with `(1 + 5)` and, using recursion, explores _every_ solution +that yields a number less than or equal to the target number. Since +it doesn't find a solution that hits the target, it returns `null` +back to the first call. There the `||` operator causes the call that +explores `(1 * 3)` to happen. This search has more luck because its +first recursive call, through yet _another_ recursive call, hits upon +the target number, 13. This innermost recursive call returns a string, +and each of the `||` operators in the intermediate calls pass that +string along, ultimately returning our solution. + +## Growing functions + +{{index [function, definition]}} + +There are two more or less natural ways for +functions to be introduced into programs. + +{{index repetition}} + +The first is that you find yourself writing very +similar code multiple times. We want to avoid doing that since having +more code means more space for mistakes to hide and more material to +read for people trying to understand the program. So we take the +repeated functionality, find a good name for it, and put it into a +function. + +The second way is that you find you need some functionality that you +haven't written yet and that sounds like it deserves its own function. +You'll start by naming the function, and you'll then write its body. +You might even start writing code that uses the function before you +actually define the function itself. + +{{index [function, naming], [variable, naming]}} + +How difficult it is to find +a good name for a function is a good indication of how clear a concept +it is that you're trying to wrap. Let's go through an example. + +{{index "farm example"}} + +We want to write a program that prints two numbers, +the numbers of cows and chickens on a farm, with the words `Cows` and +`Chickens` after them, and zeros padded before both numbers so that +they are always three digits long. + +```null +007 Cows +011 Chickens +``` + +That clearly asks for a function of two arguments. Let's get coding. + +``` +function printFarmInventory(cows, chickens) { + var cowString = String(cows); + while (cowString.length < 3) + cowString = "0" + cowString; + console.log(cowString + " Cows"); + var chickenString = String(chickens); + while (chickenString.length < 3) + chickenString = "0" + chickenString; + console.log(chickenString + " Chickens"); +} +printFarmInventory(7, 11); +``` + +{{index ["length property", "for string"], "while loop"}} + +Adding `.length` +after a string value will give us the length of that string. Thus, the +`while` loops keep adding zeros in front of the number strings until +they are at least three characters long. + +Mission accomplished! But just as we are about to send the farmer the +code (along with a hefty invoice, of course), he calls and tells us +he's also started keeping pigs, and couldn't we please extend the +software to also print pigs? + +{{index "copy-paste programming"}} + +We sure can. But just as we're in the +process of copying and pasting those four lines one more time, we stop +and reconsider. There has to be a better way. Here's a first attempt: + +``` +function printZeroPaddedWithLabel(number, label) { + var numberString = String(number); + while (numberString.length < 3) + numberString = "0" + numberString; + console.log(numberString + " " + label); +} + +function printFarmInventory(cows, chickens, pigs) { + printZeroPaddedWithLabel(cows, "Cows"); + printZeroPaddedWithLabel(chickens, "Chickens"); + printZeroPaddedWithLabel(pigs, "Pigs"); +} + +printFarmInventory(7, 11, 3); +``` + +{{index [function, naming]}} + +It works! But that name, +`printZeroPaddedWithLabel`, is a little awkward. It conflates three +things—printing, zero-padding, and adding a label—into a single +function. + +{{index "zeroPad function"}} + +Instead of lifting out the repeated part of our +program wholesale, let's try to pick out a single _concept_. + +``` +function zeroPad(number, width) { + var string = String(number); + while (string.length < width) + string = "0" + string; + return string; +} + +function printFarmInventory(cows, chickens, pigs) { + console.log(zeroPad(cows, 3) + " Cows"); + console.log(zeroPad(chickens, 3) + " Chickens"); + console.log(zeroPad(pigs, 3) + " Pigs"); +} + +printFarmInventory(7, 16, 3); +``` + +{{index readability, "pure function"}} + +A function with a nice, obvious +name like `zeroPad` makes it easier for someone who reads the code to +figure out what it does. And it is useful in more situations than just +this specific program. For example, you could use it to help print +nicely aligned tables of numbers. + +{{index [interface, design]}} + +How smart and versatile should our function be? +We could write anything from a terribly simple function that simply +pads a number so that it's three characters wide to a complicated +generalized number-formatting system that handles fractional numbers, +negative numbers, alignment of dots, padding with different +characters, and so on. + +A useful principle is not to add cleverness unless you are absolutely +sure you're going to need it. It can be tempting to write general +“((framework))s” for every little bit of functionality you come +across. Resist that urge. You won't get any real work done, and you'll +end up writing a lot of code that no one will ever use. + +{{id pure}} +## Functions and side effects + +{{index "side effect", "pure function", [function, purity]}} + +Functions can +be roughly divided into those that are called for their side effects +and those that are called for their return value. (Though it is +definitely also possible to have both side effects and return a +value.) + +{{index reuse}} + +The first helper function in the ((farm example)), +`printZeroPaddedWithLabel`, is called for its side effect: it prints a +line. The second version, `zeroPad`, is called for its return value. +It is no coincidence that the second is useful in more situations than +the first. Functions that create values are easier to combine in new +ways than functions that directly perform side effects. + +{{index substitution}} + +A _pure_ function is a specific kind of +value-producing function that not only has no side effects but also +doesn't rely on side effects from other code—for example, it doesn't +read global variables that are occasionally changed by other code. A +pure function has the pleasant property that, when called with the +same arguments, it always produces the same value (and doesn't do +anything else). This makes it easy to reason about. A call to such a +function can be mentally substituted by its result, without changing +the meaning of the code. When you are not sure that a pure function is +working correctly, you can test it by simply calling it, and know that +if it works in that context, it will work in any context. Nonpure +functions might return different values based on all kinds of factors +and have side effects that might be hard to test and think about. + +{{index optimization, "console.log"}} + +Still, there's no need to feel bad +when writing functions that are not pure or to wage a holy war to +purge them from your code. Side effects are often useful. There'd be +no way to write a pure version of `console.log`, for example, and +`console.log` is certainly useful. Some operations are also easier to +express in an efficient way when we use side effects, so computing +speed can be a reason to avoid purity. + +## Summary + +This chapter taught you how to write your own functions. The +`function` keyword, when used as an expression, can create a function +value. When used as a statement, it can be used to declare a variable +and give it a function as its value. + +``` +// Create a function value f +var f = function(a) { + console.log(a + 2); +}; + +// Declare g to be a function +function g(a, b) { + return a * b * 3.5; +} +``` + +A key aspect in understanding functions is understanding local scopes. +Parameters and variables declared inside a function are local to the +function, re-created every time the function is called, and not visible +from the outside. Functions declared inside another function have +access to the outer function's local scope. + +Separating the tasks your program performs into different +functions is helpful. You won't have to repeat yourself as much, and +functions can make a program more readable by grouping code into +conceptual chunks, in the same way that chapters and sections help +organize regular text. + +## Exercises + +### Minimum + +{{index "Math object", "minimum (exercise)", "Math.min function", minimum}} + +The +[previous chapter](02_program_structure.html#return_values) +introduced the standard function `Math.min` that returns its smallest +argument. We can do that ourselves now. Write a function `min` that +takes two arguments and returns their minimum. + +{{if interactive + +{{test no}} + +``` +// Your code here. + +console.log(min(0, 10)); +// → 0 +console.log(min(0, -10)); +// → -10 +``` +if}} + +{{hint + +{{index "minimum (exercise)"}} + +If you have trouble putting braces and +parentheses in the right place to get a valid function definition, +start by copying one of the examples in this chapter and modifying it. + +{{index "return keyword"}} + +A function may contain multiple `return` +statements. + +hint}} + +### Recursion + +{{index recursion, "isEven (exercise)", "even number"}} + +We've seen +that `%` (the remainder operator) can be used to test whether a number +is even or odd by using `% 2` to check whether it's divisible by two. +Here's another way to define whether a positive whole number is even +or odd: + +- Zero is even. + +- One is odd. + +- For any other number _N_, its evenness is the same as _N_ - 2. + +Define a recursive function `isEven` corresponding to this +description. The function should accept a `number` parameter and +return a Boolean. + +{{index "stack overflow"}} + +Test it on 50 and 75. See how it behaves on -1. +Why? Can you think of a way to fix this? + +{{if interactive + +{{test no}} + +``` +// Your code here. + +console.log(isEven(50)); +// → true +console.log(isEven(75)); +// → false +console.log(isEven(-1)); +// → ?? +``` +if}} + +{{hint + +{{index "isEven (exercise)", ["if keyword", chaining], recursion}} + +Your +function will likely look somewhat similar to the inner `find` +function in the recursive `findSolution` +[example](03_functions.html#recursive_puzzle) in this chapter, with +an `if`/`else if`/`else` chain that tests which of the three cases +applies. The final `else`, corresponding to the third case, makes the +recursive call. Each of the branches should contain a `return` +statement or in some other way arrange for a specific value to be +returned. + +{{index "stack overflow"}} + +When given a negative number, the function will +recurse again and again, passing itself an ever more negative number, +thus getting further and further away from returning a result. It will +eventually run out of stack space and abort. + +hint}} + +### Bean counting + +{{index "bean counting (exercise)", "charAt method", [string, indexing], "zero-based counting"}} + +You can get the +Nth character, or letter, from a string by writing +`"string".charAt(N)`, similar to how you get its length with +`"s".length`. The returned value will be a string containing only one +character (for example, `"b"`). The first character has position zero, +which causes the last one to be found at position `string.length - 1`. +In other words, a two-character string has length 2, and its +characters have positions 0 and 1. + +Write a function `countBs` that takes a string as its only argument +and returns a number that indicates how many uppercase “B” characters +are in the string. + +Next, write a function called `countChar` that behaves like `countBs`, +except it takes a second argument that indicates the character that is +to be counted (rather than counting only uppercase “B” characters). +Rewrite `countBs` to make use of this new function. + +{{if interactive + +{{test no}} + +``` +// Your code here. + +console.log(countBs("BBC")); +// → 2 +console.log(countChar("kakkerlak", "k")); +// → 4 +``` +if}} + +{{hint + +{{index "bean counting (exercise)", ["length property", "for string"], "counter variable"}} + +A ((loop)) in your function will have +to look at every character in the string by running an index from zero +to one below its length (`< string.length`). If the character at the +current position is the same as the one the function is looking for, +it adds 1 to a counter variable. Once the loop has finished, the +counter can be returned. + +{{index "local variable"}} + +Take care to make all the variables used in the +function _local_ to the function by using the `var` keyword. + +hint}} + diff --git a/04_data.md b/04_data.md new file mode 100644 index 000000000..60491c80d --- /dev/null +++ b/04_data.md @@ -0,0 +1,1585 @@ +{{meta {chap_num: 4, prev_link: 03_functions, next_link: 05_higher_order, load_files: ["code/jacques_journal.js", "code/chapter/04_data.js"], zip: "node/html"}}} + +# Data Structures: Objects and Arrays + +{{quote {author: "Charles Babbage", title: "Passages from the Life of a Philosopher (1864)", chapter: true} + +On two occasions I have been asked, ‘Pray, +Mr. Babbage, if you put into the machine wrong figures, will the right +answers come out?’ [...] I am not able rightly to apprehend the kind +of confusion of ideas that could provoke such a question. + +quote}} + +{{index "Babbage, Charles", object, "data structure"}} + +Numbers, Booleans, and strings are the +bricks that ((data)) structures are built from. But you can't make +much of a house out of a single brick. _Objects_ allow us to group +values—including other objects—together and thus build more complex +structures. + +The programs we have built so far have been seriously hampered by the +fact that they were operating only on simple data types. This chapter +will add a basic understanding of data structures to your toolkit. By +the end of it, you'll know enough to start writing some useful +programs. + +The chapter will work through a more or less realistic programming +example, introducing concepts as they apply to the problem at hand. +The example code will often build on functions and variables that were +introduced earlier in the text. + +{{if book + +{{index sandbox}} + +The online coding sandbox for the book +(http://eloquentjavascript.net/code[_eloquentjavascript.net/code_]) +provides a way to run code in the context of a specific chapter. If +you decide to work through the examples in another environment, be +sure to first download the full code for this chapter from the +sandbox page. + +if}} + +## The weresquirrel + +{{index "weresquirrel example", lycanthropy}} + +Every now and then, usually +between eight and ten in the evening, ((Jacques)) finds himself +transforming into a small furry rodent with a bushy tail. + +On one hand, Jacques is quite glad that he doesn't have classic +lycanthropy. Turning into a squirrel tends to cause fewer problems +than turning into a wolf. Instead of having to worry about +accidentally eating the neighbor (_that_ would be awkward), he worries +about being eaten by the neighbor's cat. After two occasions where he +woke up on a precariously thin branch in the crown of an oak, naked +and disoriented, he has taken to locking the doors and windows of his +room at night and putting a few walnuts on the floor to keep himself +busy. + +{{figure {url: "img/weresquirrel.png", alt: "The weresquirrel"}}} + +That takes care of the cat and oak problems. But Jacques still suffers +from his condition. The irregular occurrences of the transformation +make him suspect that they might be triggered by something. +For a while, he believed that it happened only on days when he +had touched trees. So he stopped touching trees entirely and even +avoided going near them. But the problem persisted. + +{{index journal}} + +Switching to a more scientific approach, Jacques intends +to start keeping a daily log of everything he did that day and whether +he changed form. With this data he hopes to narrow down the conditions +that trigger the transformations. + +The first thing he does is design a data structure to store this +information. + +## Data sets + +{{index "data structure"}} + +To work with a chunk of digital data, we'll first +have to find a way to represent it in our machine's ((memory)). Say, +as a simple example, that we want to represent a ((collection)) of +numbers: 2, 3, 5, 7, and 11. + +{{index string}} + +We could get creative with strings—after all, strings +can be any length, so we can put a lot of data into them—and use `"2 3 +5 7 11"` as our representation. But this is awkward. You'd have to +somehow extract the digits and convert them back to numbers to access +them. + +{{index [array, creation], "[] (array)"}} + +Fortunately, JavaScript +provides a data type specifically for storing sequences of values. It +is called an _array_ and is written as a list of values between +((square brackets)), separated by commas. + +``` +var listOfNumbers = [2, 3, 5, 7, 11]; +console.log(listOfNumbers[2]); +// → 5 +console.log(listOfNumbers[2 - 1]); +// → 3 +``` + +{{index "[] (subscript)", [array, indexing]}} + +The notation for getting +at the elements inside an array also uses ((square brackets)). A pair +of square brackets immediately after an expression, with another +expression inside of them, will look up the element in the left-hand +expression that corresponds to the _((index))_ given by the expression +in the brackets. + +{{id array_indexing}} +The first index of an array is zero, not one. So the first element can +be read with `listOfNumbers[0]`. If you don't have a programming +background, this convention might take some getting used to. But +((zero-based counting)) has a long tradition in technology, and as +long as this convention is followed consistently (which it is, in +JavaScript), it works well. + +{{id properties}} +## Properties + +{{index "Math object", "Math.max function", ["length property", "for string"], [object, property], "period character"}} + +We've seen a few +suspicious-looking expressions like `myString.length` (to get the +length of a string) and `Math.max` (the maximum function) in past +examples. These are expressions that access a _((property))_ of some +value. In the first case, we access the `length` property of the value +in `myString`. In the second, we access the property named `max` in +the `Math` object (which is a collection of mathematics-related values +and functions). + +{{index property, null, undefined}} + +Almost all JavaScript values +have properties. The exceptions are `null` and `undefined`. If you try +to access a property on one of these nonvalues, you get an error. + +{{test no}} + +``` +null.length; +// → TypeError: Cannot read property 'length' of null +``` + +{{index "[] (subscript)", "period character", "square brackets", "computed property"}} + +{{indexsee "dot character", "period character"}} + +The two most common ways to access +properties in JavaScript are with a dot and with square brackets. Both +`value.x` and `value[x]` access a ((property)) on _value_—but not +necessarily the same property. The difference is in how `x` is +interpreted. When using a dot, the part after the dot must be a valid +variable name, and it directly names the property. When using square +brackets, the expression between the brackets is _evaluated_ to get +the property name. Whereas `value.x` fetches the property of `value` +named “x”, `value[x]` tries to evaluate the expression `x` and uses +the result as the property name. + +So if you know that the property you are interested in is called +“length”, you say `value.length`. If you want to extract the property +named by the value held in the variable `i`, you say `value[i]`. And +because property names can be any string, if you want to access a +property named “2” or “John Doe”, you must use square brackets: +`value[2]` or `value["John Doe"]`. This is the case even though you +know the precise name of the property in advance, because neither “2” +nor “John Doe” is a valid variable name and so cannot be accessed +through dot notation. + +{{index array, ["length property", "for array"], [array, "length of"]}} + +The elements in an array are stored in properties. Because the +names of these properties are numbers and we often need to get their +name from a variable, we have to use the bracket syntax to access +them. The `length` property of an array tells us how many elements it +contains. This property name is a valid variable name, and we know its +name in advance, so to find the length of an array, you typically +write `array.length` because that is easier to write than +`array["length"]`. + +{{id methods}} +## Methods + +{{index [function, "as property"], method, string}} + +Both string and +array objects contain, in addition to the `length` property, a number +of properties that refer to function values. + +``` +var doh = "Doh"; +console.log(typeof doh.toUpperCase); +// → function +console.log(doh.toUpperCase()); +// → DOH +``` + +{{index "case conversion", "toUpperCase method", "toLowerCase method"}} + +Every string has a `toUpperCase` property. When called, it +will return a copy of the string, in which all letters have been +converted to uppercase. There is also `toLowerCase`. You can guess +what that does. + +{{index this}} + +Interestingly, even though the call to `toUpperCase` does +not pass any arguments, the function somehow has access to the string +`"Doh"`, the value whose property we called. How this works is +described in [Chapter 6](06_object.html#obj_methods). + +Properties that contain functions are generally called _methods_ of +the value they belong to. As in, “_toUpperCase_ is a method of a +string”. + +{{index collection, array, string, "push method", "pop method", "join method"}} + +{{id array_methods}} +This example demonstrates +some methods that array objects have: + +``` +var mack = []; +mack.push("Mack"); +mack.push("the", "Knife"); +console.log(mack); +// → ["Mack", "the", "Knife"] +console.log(mack.join(" ")); +// → Mack the Knife +console.log(mack.pop()); +// → Knife +console.log(mack); +// → ["Mack", "the"] +``` + +The `push` method can be used to add values to the end of an array. +The `pop` method does the opposite: it removes the value at the end of +the array and returns it. An array of strings can be flattened to a +single string with the `join` method. The argument given to `join` +determines the text that is glued between the array's elements. + +## Objects + +{{index journal, "weresquirrel example", array, record}} + +Back to the weresquirrel. A set of daily log +entries can be represented as an array. But the entries do not consist +of just a number or a string—each entry needs to store a list of +activities and a Boolean value that indicates whether Jacques turned +into a squirrel. Ideally, we would like to group these values together +into a single value and then put these grouped values into an array of +log entries. + +{{index syntax, object, property, "curly braces", "{} (object)"}} + +Values of the type _object_ are arbitrary collections of +properties, and we can add or remove these properties as we please. +One way to create an object is by using a curly brace notation. + +``` +var day1 = { + squirrel: false, + events: ["work", "touched tree", "pizza", "running", + "television"] +}; +console.log(day1.squirrel); +// → false +console.log(day1.wolf); +// → undefined +day1.wolf = false; +console.log(day1.wolf); +// → false +``` + +{{index [quoting, "of object properties"], "colon character"}} + +Inside the +curly braces, we can give a list of properties separated by commas. +Each property is written as a name, followed by a colon, followed by +an expression that provides a value for the property. Spaces and line +breaks are not significant. When an object spans multiple lines, +indenting it like in the previous example improves readability. +Properties whose names are not valid variable names or valid numbers +have to be quoted. + +``` +var descriptions = { + work: "Went to work", + "touched tree": "Touched a tree" +}; +``` + +This means that ((curly braces)) have _two_ meanings in JavaScript. At +the start of a statement, they start a block of statements. In any +other position, they describe an object. Fortunately, it is almost +never useful to start a statement with a curly-brace object, and in +typical programs, there is no ambiguity between these two uses. + +{{index undefined}} + +Reading a property that doesn't exist will produce the +value `undefined`, which happens the first time we try to read the `wolf` +property in the previous example. + +{{index [property, assignment], mutability, "= operator"}} + +It is +possible to assign a value to a property expression with the `=` +operator. This will replace the property's value if it already existed +or create a new property on the object if it didn't. + +{{index "tentacle (analogy)", [property, "model of"]}} + +To briefly return to +our tentacle model of ((variable)) bindings—property bindings are +similar. They _grasp_ values, but other variables and properties might +be holding onto those same values. You may think of objects as +octopuses with any number of tentacles, each of which has a name +inscribed on it. + +{{figure {url: "img/octopus-object.jpg", alt: "Artist's representation of an object"}}} + +{{index "delete operator", [property, deletion]}} + +The `delete` operator cuts +off a tentacle from such an octopus. It is a unary operator that, when +applied to a property access expression, will remove the named +property from the object. This is not a common thing to do, but it is +possible. + +``` +var anObject = {left: 1, right: 2}; +console.log(anObject.left); +// → 1 +delete anObject.left; +console.log(anObject.left); +// → undefined +console.log("left" in anObject); +// → false +console.log("right" in anObject); +// → true +``` + +{{index "in operator", [property, "testing for"], object}} + +The binary +`in` operator, when applied to a string and an object, returns a +Boolean value that indicates whether that object has that property. +The difference between setting a property to `undefined` and actually +deleting it is that, in the first case, the object still _has_ the +property (it just doesn't have a very interesting value), whereas in +the second case the property is no longer present and `in` will return +`false`. + +{{index array, collection}} + +Arrays, then, are just a kind of +object specialized for storing sequences of things. If you evaluate +`typeof [1, 2]`, this produces `"object"`. You can see them as long, +flat octopuses with all their arms in a neat row, labeled with +numbers. + +{{figure {url: "img/octopus-array.jpg", alt: "Artist's representation of an array"}}} + +{{index journal, "weresquirrel example"}} + +So we can represent Jacques’ +journal as an array of objects. + +``` +var journal = [ + {events: ["work", "touched tree", "pizza", + "running", "television"], + squirrel: false}, + {events: ["work", "ice cream", "cauliflower", + "lasagna", "touched tree", "brushed teeth"], + squirrel: false}, + {events: ["weekend", "cycling", "break", + "peanuts", "beer"], + squirrel: true}, + /* and so on... */ +]; +``` + +## Mutability + +We will get to actual programming _real_ soon now. But first, there's +one last piece of theory to understand. + +{{index mutability, "side effect", number, string, Boolean, object}} + +We've seen that object +values can be modified. The types of values discussed in earlier +chapters, such as numbers, strings, and Booleans, are all +_immutable_—it is impossible to change an existing value of those +types. You can combine them and derive new values from them, but when +you take a specific string value, that value will always remain the +same. The text inside it cannot be changed. If you have reference to a +string that contains `"cat"`, it is not possible for other code to +change a character in _that_ string to make it spell `"rat"`. + +With objects, on the other hand, the content of a value _can_ be +modified by changing its properties. + +{{index [object, identity], identitiy, memory}} + +When we have two +numbers, 120 and 120, we can consider them precisely the same number, +whether or not they refer to the same physical bits. But with objects, +there is a difference between having two references to the same object +and having two different objects that contain the same properties. +Consider the following code: + +``` +var object1 = {value: 10}; +var object2 = object1; +var object3 = {value: 10}; + +console.log(object1 == object2); +// → true +console.log(object1 == object3); +// → false + +object1.value = 15; +console.log(object2.value); +// → 15 +console.log(object3.value); +// → 10 +``` + +{{index "tentacle (analogy)", [variable, "model of"]}} + +The `object1` and +`object2` variables grasp the _same_ object, which is why changing +`object1` also changes the value of `object2`. The variable `object3` +points to a different object, which initially contains the same +properties as `object1` but lives a separate life. + +{{index "== operator", [comparison, "of objects"], "deep comparison"}} + +JavaScript's `==` operator, when comparing objects, will +return `true` only if both objects are precisely the same value. +Comparing different objects will return `false`, even if they have +identical contents. There is no “deep” comparison operation built into +JavaScript, which looks at object's contents, but it is possible to +write it yourself (which will be one of the +[exercises](04_data.html#exercise_deep_compare) at the end of this +chapter). + +## The lycanthrope's log + +{{index "weresquirrel example", lycanthropy, "addEntry function"}} + +So +Jacques starts up his JavaScript interpreter and sets up the +environment he needs to keep his ((journal)). + +// include_code + +``` +var journal = []; + +function addEntry(events, didITurnIntoASquirrel) { + journal.push({ + events: events, + squirrel: didITurnIntoASquirrel + }); +} +``` + +And then, every evening at ten—or sometimes the next morning, after +climbing down from the top shelf of his bookcase—he records the day. + +``` +addEntry(["work", "touched tree", "pizza", "running", + "television"], false); +addEntry(["work", "ice cream", "cauliflower", "lasagna", + "touched tree", "brushed teeth"], false); +addEntry(["weekend", "cycling", "break", "peanuts", + "beer"], true); +``` + +Once he has enough data points, he intends to compute the +((correlation)) between his squirrelification and each of the day's +events and ideally learn something useful from those correlations. + +{{index correlation}} + +_Correlation_ is a measure of ((dependence)) between +((variable))s (“variables” in the statistical sense, not the +JavaScript sense). It is usually expressed as a coefficient that +ranges from -1 to 1. Zero correlation means the variables are not +related, whereas a correlation of one indicates that the two are +perfectly related—if you know one, you also know the other. Negative +one also means that the variables are perfectly related but that they +are opposites—when one is true, the other is false. + +{{index "phi coefficient"}} + +For binary (Boolean) variables, the _phi_ +coefficient (_ϕ_) provides a good measure of correlation and is +relatively easy to compute. To compute _ϕ_, we need a ((table)) _n_ +that contains the number of times the various combinations of the two +variables were observed. For example, we could take the event of +eating ((pizza)) and put that in a table like this: + +{{figure {url: "img/pizza-squirrel.svg", alt: "Eating pizza versus turning into a squirrel",width: "7cm"}}} + +_ϕ_ can be computed using the following formula, where _n_ refers to the table: + +{{if html + +```null +
    + + + + +
    ϕ = +
    n11n00 - n10n01
    +
    + n1•n0•n•1n•0 +
    +
    +
    +``` + +if}} + +{{if tex + +pass:[\begin{equation}\varphi = \frac{n_{11}n_{00}-n_{10}n_{01}}{\sqrt{n_{1\bullet}n_{0\bullet}n_{\bullet1}n_{\bullet0}}}\end{equation}] + +if}} + +The notation (!html _n_~01~!)(!tex pass:[$n_{01}$]!) indicates the +number of measurements where the first variable (squirrelness) is false +(0) and the second variable (pizza) is true (1). In this +example, (!html _n_~01~!)(!tex pass:[$n_{01}$]!) is 9. + +The value (!html _n_~1•~!)(!tex pass:[$n_{1\bullet}$]!) refers to the +sum of all measurements where the first variable is true, which is 5 +in the example table. Likewise, (!html _n_~•0~!)(!tex pass:[$n_{\bullet0}$]!) +refers to the sum of the measurements where the second variable is false. + +{{index correlation, "phi coefficient"}} + +So for the pizza table, the part +above the division line (the dividend) would be 1×76 - 4×9 = 40, and +the part below it (the divisor) would be the square root of +5×85×10×80, or (!html √340000!)(!tex pass:[$\sqrt{340000}$]!). This +comes out to _ϕ_ ≈ 0.069, which is tiny. Eating ((pizza)) does not +appear to have influence on the transformations. + +## Computing correlation + +{{index [array, "as table"], [nesting, "of arrays"]}} + +We can represent a +two-by-two ((table)) in JavaScript with a four-element array (`[76, 9, +4, 1]`). We could also use other representations, such as an array +containing two two-element arrays (`[[76, 9], [4, 1]]`) or an object +with property names like `"11"` and `"01"`, but the flat array is +simple and makes the expressions that access the table pleasantly +short. We'll interpret the indices to the array as two-((bit)) +((binary number)), where the leftmost (most significant) digit refers +to the squirrel variable and the rightmost (least significant) digit +refers to the event variable. For example, the binary number `10` +refers to the case where Jacques did turn into a squirrel, but the +event (say, "pizza") didn't occur. This happened four times. And since +binary `10` is 2 in decimal notation, we will store this number at +index 2 of the array. + +{{index "phi coefficient", "phi function"}} + +This is the function that +computes the _ϕ_ coefficient from such an array: + +{{test clip}} +{{includeCode "strip_log"}} + +``` +function phi(table) { + return (table[3] * table[0] - table[2] * table[1]) / + Math.sqrt((table[2] + table[3]) * + (table[0] + table[1]) * + (table[1] + table[3]) * + (table[0] + table[2])); +} + +console.log(phi([76, 9, 4, 1])); +// → 0.068599434 +``` + +{{index "square root", "Math.sqrt function"}} + +This is simply a direct +translation of the _ϕ_ formula into JavaScript. `Math.sqrt` is the +square root function, as provided by the `Math` object in a standard +JavaScript environment. We have to sum two fields from the table to +get fields like (!html n~1•~!)(!tex pass:[$n_{1\bullet}$]!) because +the sums of rows or columns are not stored directly in our data +structure. + +{{index "JOURNAL data set"}} + +Jacques kept his journal for three months. The +resulting ((data set)) is available in the coding sandbox for this +chapter(!book (http://eloquentjavascript.net/code#4[_eloquentjavascript.net/code#4_])!), +where it is stored in the `JOURNAL` variable, and in a downloadable +http://eloquentjavascript.net/code/jacques_journal.js[file]. + +{{index "tableFor function", "hasEvent function"}} + +To extract a two-by-two +((table)) for a specific event from this journal, we must loop over +all the entries and tally up how many times the event occurs in +relation to squirrel transformations. + +{{includeCode "strip_log"}} + +``` +function hasEvent(event, entry) { + return entry.events.indexOf(event) != -1; +} + +function tableFor(event, journal) { + var table = [0, 0, 0, 0]; + for (var i = 0; i < journal.length; i++) { + var entry = journal[i], index = 0; + if (hasEvent(event, entry)) index += 1; + if (entry.squirrel) index += 2; + table[index] += 1; + } + return table; +} + +console.log(tableFor("pizza", JOURNAL)); +// → [76, 9, 4, 1] +``` + +{{index [array, searching], "indexOf method"}} + +The `hasEvent` function tests +whether an entry contains a given event. Arrays have an `indexOf` +method that tries to find a given value (in this case, the event name) +in the array and returns the index at which it was found or -1 if it +wasn't found. So if the call to `indexOf` doesn't return -1, then we +know the event was found in the entry. + +{{index [array, indexing]}} + +The body of the loop in `tableFor` figures +out which box in the table each journal entry falls into by checking +whether the entry contains the specific event it's interested in and +whether the event happens alongside a squirrel incident. The loop then +adds one to the number in the array that corresponds to this box on +the table. + +We now have the tools we need to compute individual ((correlation))s. +The only step remaining is to find a correlation for every type of +event that was recorded and see whether anything stands out. But how +should we store these correlations once we compute them? + +## Objects as maps + +{{index "weresquirrel example", array}} + +One possible way is to store +all the ((correlation))s in an array, using objects with `name` and +`value` properties. But that makes looking up the correlation for a +given event somewhat cumbersome: you'd have to loop over the whole +array to find the object with the right `name`. We could wrap this +lookup process in a function, but we would still be writing more code, +and the computer would be doing more work than necessary. + +{{index object, "square brackets", [object, "as map"], "in operator"}} + +{{id object_map}} +A better way is to use object properties named after the +event types. We can use the square bracket access notation to create +and read the properties and can use the `in` operator to test whether +a given property exists. + +``` +var map = {}; +function storePhi(event, phi) { + map[event] = phi; +} + +storePhi("pizza", 0.069); +storePhi("touched tree", -0.081); +console.log("pizza" in map); +// → true +console.log(map["touched tree"]); +// → -0.081 +``` + +{{index "data structure"}} + +A _((map))_ is a way to go from values in one +domain (in this case, event names) to corresponding values in another +domain (in this case, _ϕ_ coefficients). + +There are a few potential problems with using objects like this, which +we will discuss in [Chapter 6](06_object.html#prototypes), but for +the time being, we won't worry about those. + +{{index "for/in loop", "for loop", [object, "looping over"]}} + +What if +we want to find all the events for which we have stored a coefficient? +The properties don't form a predictable series, like they would in an +array, so we cannot use a normal `for` loop. JavaScript provides a +loop construct specifically for going over the properties of an +object. It looks a little like a normal `for` loop but distinguishes +itself by the use of the word `in`. + +``` +for (var event in map) + console.log("The correlation for '" + event + + "' is " + map[event]); +// → The correlation for 'pizza' is 0.069 +// → The correlation for 'touched tree' is -0.081 +``` + +{{id analysis}} +## The final analysis + +{{index journal, "weresquirrel example", "gatherCorrelations function"}} + +To find all the types of events that are present in the +data set, we simply process each entry in turn and then loop over the +events in that entry. We keep an object `phis` that has correlation +coefficients for all the event types we have seen so far. Whenever we +run across a type that isn't in the `phis` object yet, we compute its +correlation and add it to the object. + +{{test clip}} +{{includeCode "strip_log"}} + +``` +function gatherCorrelations(journal) { + var phis = {}; + for (var entry = 0; entry < journal.length; entry++) { + var events = journal[entry].events; + for (var i = 0; i < events.length; i++) { + var event = events[i]; + if (!(event in phis)) + phis[event] = phi(tableFor(event, journal)); + } + } + return phis; +} + +var correlations = gatherCorrelations(JOURNAL); +console.log(correlations.pizza); +// → 0.068599434 +``` + +{{index correlation}} + +Let's see what came out. + +{{test no}} + +``` +for (var event in correlations) + console.log(event + ": " + correlations[event]); +// → carrot: 0.0140970969 +// → exercise: 0.0685994341 +// → weekend: 0.1371988681 +// → bread: -0.0757554019 +// → pudding: -0.0648203724 +// and so on... +``` + +{{index "for/in loop"}} + +Most correlations seem to lie close to zero. Eating +carrots, bread, or pudding apparently does not trigger +squirrel-lycanthropy. It _does_ seem to occur somewhat more often on +weekends, however. Let's filter the results to show only correlations +greater than 0.1 or less than -0.1. + +{{startCode}} +{{test no}} + +``` +for (var event in correlations) { + var correlation = correlations[event]; + if (correlation > 0.1 || correlation < -0.1) + console.log(event + ": " + correlation); +} +// → weekend: 0.1371988681 +// → brushed teeth: -0.3805211953 +// → candy: 0.1296407447 +// → work: -0.1371988681 +// → spaghetti: 0.2425356250 +// → reading: 0.1106828054 +// → peanuts: 0.5902679812 +``` + +A-ha! There are two factors whose ((correlation)) is clearly stronger +than the others. Eating ((peanuts)) has a strong positive effect on +the chance of turning into a squirrel, whereas brushing his teeth has +a significant negative effect. + +Interesting. Let's try something. + +{{includeCode "strip_log"}} + +``` +for (var i = 0; i < JOURNAL.length; i++) { + var entry = JOURNAL[i]; + if (hasEvent("peanuts", entry) && + !hasEvent("brushed teeth", entry)) + entry.events.push("peanut teeth"); +} +console.log(phi(tableFor("peanut teeth", JOURNAL))); +// → 1 +``` + +Well, that's unmistakable! The phenomenon occurs precisely when +Jacques eats ((peanuts)) and fails to brush his teeth. If only he +weren't such a slob about dental hygiene, he'd have never even noticed +his affliction. + +Knowing this, Jacques simply stops eating peanuts altogether and finds +that this completely puts an end to his transformations. + +{{index "weresquirrel example"}} + +All is well with Jacques for a while. But a +few years later, he loses his ((job)) and is eventually forced to take +employment with a ((circus)), where he performs as _The Incredible +Squirrelman_ by stuffing his mouth with peanut butter before every +show. One day, fed up with this pitiful existence, Jacques fails to +change back into his human form, hops through a crack in the circus +tent, and vanishes into the forest. He is never seen again. + +## Further arrayology + +{{index [array, methods], method}} + +Before finishing up this chapter, +I want to introduce you to a few more object-related concepts. We'll +start by introducing some generally useful array methods. + +{{index "push method", "pop method", "shift method", "unshift method"}} + +We saw `push` and `pop`, which add and remove elements at the +end of an array, [earlier](04_data.html#array_methods) in this +chapter. The corresponding methods for adding and removing things at +the start of an array are called `unshift` and `shift`. + +``` +var todoList = []; +function rememberTo(task) { + todoList.push(task); +} +function whatIsNext() { + return todoList.shift(); +} +function urgentlyRememberTo(task) { + todoList.unshift(task); +} +``` + +{{index "task management example"}} + +The previous program manages lists of +tasks. You add tasks to the end of the list by calling +`rememberTo("eat")`, and when you're ready to do something, you call +`whatIsNext()` to get (and remove) the front item from the list. The +`urgentlyRememberTo` function also adds a task but adds it to the +front instead of the back of the list. + +{{index [array, searching], "indexOf method", "lastIndexOf method"}} + +The `indexOf` method has a sibling called `lastIndexOf`, +which starts searching for the given element at the end of the array +instead of the front. + +``` +console.log([1, 2, 3, 2, 1].indexOf(2)); +// → 1 +console.log([1, 2, 3, 2, 1].lastIndexOf(2)); +// → 3 +``` + +Both `indexOf` and `lastIndexOf` take an optional second argument that +indicates where to start searching from. + +{{index "slice method", [array, indexing]}} + +Another fundamental method +is `slice`, which takes a start index and an end index and returns an +array that has only the elements between those indices. The start +index is inclusive, the end index exclusive. + +``` +console.log([0, 1, 2, 3, 4].slice(2, 4)); +// → [2, 3] +console.log([0, 1, 2, 3, 4].slice(2)); +// → [2, 3, 4] +``` + +{{index [string, indexing]}} + +When the end index is not given, `slice` +will take all of the elements after the start index. Strings also have +a `slice` method, which has a similar effect. + +{{index concatenation, "concat method"}} + +The `concat` method can be used +to glue arrays together, similar to what the `+` operator does for +strings. The following example shows both `concat` and `slice` in +action. It takes an array and an index, and it returns a new array +that is a copy of the original array with the element at the given +index removed. + +``` +function remove(array, index) { + return array.slice(0, index) + .concat(array.slice(index + 1)); +} +console.log(remove(["a", "b", "c", "d", "e"], 2)); +// → ["a", "b", "d", "e"] +``` + +## Strings and their properties + +{{index [string, properties]}} + +We can read properties like `length` and +`toUpperCase` from string values. But if you try to add a new +property, it doesn't stick. + +``` +var myString = "Fido"; +myString.myProperty = "value"; +console.log(myString.myProperty); +// → undefined +``` + +Values of type string, number, and Boolean are not objects, and though +the language doesn't complain if you try to set new properties on +them, it doesn't actually store those properties. The values are +immutable and cannot be changed. + +{{index [string, methods], "slice method", "indexOf method", [string, searching]}} + +But these types do have some built-in +properties. Every string value has a number of methods. The most +useful ones are probably `slice` and `indexOf`, which resemble the +array methods of the same name. + +``` +console.log("coconuts".slice(4, 7)); +// → nut +console.log("coconut".indexOf("u")); +// → 5 +``` + +One difference is that a string's `indexOf` can take a string +containing more than one character, whereas the corresponding array +method looks only for a single element. + +``` +console.log("one two three".indexOf("ee")); +// → 11 +``` + +{{index whitespace, "trim method"}} + +The `trim` method removes whitespace +(spaces, newlines, tabs, and similar characters) from the start and +end of a string. + +``` +console.log(" okay \n ".trim()); +// → okay +``` + +{{index ["length property", "for string"], "charAt method", [string, indexing]}} + +We have already seen the string type's +`length` property. Accessing the individual characters in a string can +be done with the `charAt` method but also by simply reading numeric +properties, like you'd do for an array. + +``` +var string = "abc"; +console.log(string.length); +// → 3 +console.log(string.charAt(0)); +// → a +console.log(string[1]); +// → b +``` + +{{id arguments_object}} +## The arguments object + +{{index "arguments object", "length property", parameter, "optional argument", "array-like object"}} + +Whenever a function is called, a special variable named +`arguments` is added to the environment in which the function body +runs. This variable refers to an object that holds all of the +arguments passed to the function. Remember that in JavaScript you are +allowed to pass more (or fewer) arguments to a function than the +number of parameters the function itself declares. + +``` +function noArguments() {} +noArguments(1, 2, 3); // This is okay +function threeArguments(a, b, c) {} +threeArguments(); // And so is this +``` + +{{index "length property"}} + +The `arguments` object has a `length` property +that tells us the number of arguments that were really passed to the +function. It also has a property for each argument, named 0, 1, 2, and +so on. + +{{index [array, methods]}} + +{{indexsee "pseudo array", "array-like object"}} + +If that sounds a lot like an array to you, +you're right, it _is_ a lot like an array. But this object, +unfortunately, does not have any array methods (like `slice` or +`indexOf`), so it is a little harder to use than a real array. + +``` +function argumentCounter() { + console.log("You gave me", arguments.length, "arguments."); +} +argumentCounter("Straw man", "Tautology", "Ad hominem"); +// → You gave me 3 arguments. +``` + +{{index journal, "console.log", "variadic function"}} + +Some functions +can take any number of arguments, like `console.log`. These typically +loop over the values in their `arguments` object. They can be used to +create very pleasant interfaces. For example, remember how we created +the entries to Jacques’ journal. + +``` +addEntry(["work", "touched tree", "pizza", "running", + "television"], false); +``` + +Since he is going to be calling this function a lot, we could create +an alternative that is easier to call. + +``` +function addEntry(squirrel) { + var entry = {events: [], squirrel: squirrel}; + for (var i = 1; i < arguments.length; i++) + entry.events.push(arguments[i]); + journal.push(entry); +} +addEntry(true, "work", "touched tree", "pizza", + "running", "television"); +``` + +{{index ["arguments object", indexing]}} + +This version reads its first argument +(`squirrel`) in the normal way and then goes over the rest of the +arguments (the loop starts at index 1, skipping the first) to gather +them into an array. + +## The Math object + +{{index "Math object", "Math.min function", "Math.max function", "Math.sqrt function", minimum, maximum, "square root"}} + +As we've seen, `Math` is a grab-bag of number-related utility +functions, such as `Math.max` (maximum), `Math.min` (minimum), and +`Math.sqrt` (square root). + +{{index namespace, "namespace pollution", object}} + +{{id namespace_pollution}} +The +`Math` object is used simply as a container to group a bunch of +related functionality. There is only one `Math` object, and it is +almost never useful as a value. Rather, it provides a _namespace_ so +that all these functions and values do not have to be global +variables. + +{{index [variable, naming]}} + +Having too many global variables “pollutes” the +namespace. The more names that have been taken, the more likely you +are to accidentally overwrite the value of some variable. For example, +it's not unlikely that you'll want to name something `max` in one of +your programs. Since JavaScript's built-in `max` function is tucked +safely inside the `Math` object, we don't have to worry about +overwriting it. + +Many languages will stop you, or at least warn you, when you are +defining a variable with a name that is already taken. JavaScript does +neither, so be careful. + +{{index "Math.cos function", "Math.sin function", "Math.tan function", "Math.acos function", "Math.asin function", "Math.atan function", "Math.PI constant", cosine, sine, tangent, "PI constant", pi}} + +Back to +the `Math` object. If you need to do ((trigonometry)), `Math` can +help. It contains `cos` (cosine), `sin` (sine), and `tan` (tangent), +as well as their inverse functions, `acos`, `asin`, and `atan`, respectively. The +number π (pi)—or at least the closest approximation that fits in a +JavaScript number—is available as `Math.PI`. (There is an old +programming tradition of writing the names of ((constant)) values in +all caps.) + +{{test no}} + +``` +function randomPointOnCircle(radius) { + var angle = Math.random() * 2 * Math.PI; + return {x: radius * Math.cos(angle), + y: radius * Math.sin(angle)}; +} +console.log(randomPointOnCircle(2)); +// → {x: 0.3667, y: 1.966} +``` + +If sines and cosines are not something you are very familiar with, +don't worry. When they are used in this book, in +[Chapter 13](13_dom.html#sin_cos), I'll explain them. + +{{index "Math.random function", "random number"}} + +The previous example +uses `Math.random`. This is a function that returns a new +pseudorandom number between zero (inclusive) and one (exclusive) +every time you call it. + +{{test no}} + +``` +console.log(Math.random()); +// → 0.36993729369714856 +console.log(Math.random()); +// → 0.727367032552138 +console.log(Math.random()); +// → 0.40180766698904335 +``` + +{{index "pseudorandom number", "random number"}} + +Though computers are +deterministic machines—they always react the same way if given the +same input—it is possible to have them produce numbers that appear +random. To do this, the machine keeps a number (or a bunch of numbers) +in its internal state. Then, every time a random number is requested, +it performs some complicated deterministic computations on this +internal state and returns part of the result of those computations. +The machine also uses the outcome to change its own internal state so +that the next “random” number produced will be different. + +{{index rounding, "Math.floor function"}} + +If we want a whole random +number instead of a fractional one, we can use `Math.floor` (which +rounds down to the nearest whole number) on the result of +`Math.random`. + +{{test no}} + +``` +console.log(Math.floor(Math.random() * 10)); +// → 2 +``` + +Multiplying the random number by 10 gives us a number greater than or +equal to zero, and below 10. Since `Math.floor` rounds down, this +expression will produce, with equal chance, any number from 0 through +9. + +{{index "Math.ceil function", "Math.round function"}} + +There are also the +functions `Math.ceil` (for “ceiling”, which rounds up to a whole +number) and `Math.round` (to the nearest whole number). + +## The global object + +{{index "global object", "window variable", "global scope", scope, object}} + +The global scope, the space in which +global variables live, can also be approached as an object in +JavaScript. Each global variable is present as a ((property)) of this +object. In ((browser))s, the global scope object is stored in the +`window` variable. + +{{test no}} + +``` +var myVar = 10; +console.log("myVar" in window); +// → true +console.log(window.myVar); +// → 10 +``` + +## Summary + +Objects and arrays (which are a specific kind of object) provide ways +to group several values into a single value. Conceptually, this allows +us to put a bunch of related things in a bag and run around with the +bag, instead of trying to wrap our arms around all of the individual +things and trying to hold on to them separately. + +Most values in JavaScript have properties, the exceptions being `null` +and `undefined`. Properties are accessed using `value.propName` or +`value["propName"]`. Objects tend to use names for their properties +and store more or less a fixed set of them. Arrays, on the other hand, +usually contain varying numbers of conceptually identical values and +use numbers (starting from 0) as the names of their properties. + +There _are_ some named properties in arrays, such as `length` and a +number of methods. Methods are functions that live in properties and +(usually) act on the value they are a property of. + +Objects can also serve as maps, associating values with names. The `in` +operator can be used to find out whether an object contains a property with +a given name. The same keyword can also be used in a `for` loop +(`for (var name in object)`) to loop over an object's properties. + +## Exercises + +### The sum of a range + +{{index "summing (exercise)"}} + +The [introduction](00_intro.html#intro) of this book alluded to the +following as a nice way to compute the sum of a range of numbers: + +{{test no}} + +``` +console.log(sum(range(1, 10))); +``` + +{{index "range function", "sum function"}} + +Write a `range` function that +takes two arguments, `start` and `end`, and returns an array +containing all the numbers from `start` up to (and including) `end`. + +Next, write a `sum` function that takes an array of numbers and +returns the sum of these numbers. Run the previous program and see +whether it does indeed return 55. + +{{index "optional argument"}} + +As a bonus assignment, modify your `range` +function to take an optional third argument that indicates the “step” +value used to build up the array. If no step is given, the array +elements go up by increments of one, corresponding to the old +behavior. The function call `range(1, 10, 2)` should return `[1, 3, 5, +7, 9]`. Make sure it also works with negative step values so that +`range(5, 2, -1)` produces `[5, 4, 3, 2]`. + +{{if interactive + +{{test no}} + +``` +// Your code here. + +console.log(range(1, 10)); +// → [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] +console.log(range(5, 2, -1)); +// → [5, 4, 3, 2] +console.log(sum(range(1, 10))); +// → 55 +``` +if}} + +{{hint + +{{index "summing (exercise)", [array, creation], "square brackets"}} + +Building up an array is most easily done by first +initializing a variable to `[]` (a fresh, empty array) and repeatedly +calling its `push` method to add a value. Don't forget to return the +array at the end of the function. + +{{index [array, indexing], comparison}} + +Since the end boundary is +inclusive, you'll need to use the `<=` operator rather than simply `<` +to check for the end of your loop. + +{{index "arguments object"}} + +To check whether the optional step argument was +given, either check `arguments.length` or compare the value of the +argument to `undefined`. If it wasn't given, simply set it to its +((default value)) (1) at the top of the function. + +{{index "range function", "for loop"}} + +Having `range` understand negative +step values is probably best done by writing two separate loops—one +for counting up and one for counting down—because the comparison that +checks whether the loop is finished needs to be `>=` rather than `<=` +when counting downward. + +It might also be worthwhile to use a different default step, namely, +-1, when the end of the range is smaller than the start. That way, +`range(5, 2)` returns something meaningful, rather than getting stuck +in an ((infinite loop)). + +hint}} + +### Reversing an array + +{{index "reversing (exercise)", "reverse method", [array, methods]}} + +Arrays have a method `reverse`, which +changes the array by inverting the order in which its elements appear. +For this exercise, write two functions, `reverseArray` and +`reverseArrayInPlace`. The first, `reverseArray`, takes an array as +argument and produces a _new_ array that has the same elements in the +inverse order. The second, `reverseArrayInPlace`, does what the +`reverse` method does: it modifies the array given as argument in +order to reverse its elements. Neither may use the standard +`reverse` method. + +{{index efficiency, "pure function", "side effect"}} + +Thinking back to +the notes about side effects and pure functions in the +[previous chapter](03_functions.html#pure), which variant do you +expect to be useful in more situations? Which one is more efficient? + +{{if interactive + +{{test no}} + +``` +// Your code here. + +console.log(reverseArray(["A", "B", "C"])); +// → ["C", "B", "A"]; +var arrayValue = [1, 2, 3, 4, 5]; +reverseArrayInPlace(arrayValue); +console.log(arrayValue); +// → [5, 4, 3, 2, 1] +``` +if}} + +{{hint + +{{index "reversing (exercise)"}} + +There are two obvious ways to implement +`reverseArray`. The first is to simply go over the input array from +front to back and use the `unshift` method on the new array to insert +each element at its start. The second is to loop over the input array +backward and use the `push` method. Iterating over an array backward +requires a (somewhat awkward) `for` specification like `(var i = +array.length - 1; i >= 0; i--)`. + +Reversing the array in place is harder. You have to be careful not to +overwrite elements that you will later need. Using `reverseArray` or +otherwise copying the whole array (`array.slice(0)` is a good way to +copy an array) works but is cheating. + +The trick is to _swap_ the first and last elements, then the +second and second-to-last, and so on. You can do this by looping +over half the length of the array (use `Math.floor` to round down—you +don't need to touch the middle element in an array with an odd +length) and swapping the element at position `i` with the one at +position `array.length - 1 - i`. You can use a local variable to +briefly hold on to one of the elements, overwrite that one with its +mirror image, and then put the value from the local variable in the +place where the mirror image used to be. + +hint}} + +{{id list}} +### A list + +{{index "data structure", "list (exercise)", "linked list", object, array, collection}} + +Objects, as generic +blobs of values, can be used to build all sorts of data structures. A +common data structure is the _list_ (not to be confused with the +array). A list is a nested set of objects, with the first object +holding a reference to the second, the second to the third, and so on. + +// include_code + +``` +var list = { + value: 1, + rest: { + value: 2, + rest: { + value: 3, + rest: null + } + } +}; +``` + +The resulting objects form a chain, like this: + +{{figure {url: "img/linked-list.svg", alt: "A linked list",width: "6cm"}}} + +{{index "structure sharing", memory}} + +A nice thing about lists is that +they can share parts of their structure. For example, if I create two +new values `{value: 0, rest: list}` and `{value: -1, rest: list}` +(with `list` referring to the variable defined earlier), they are both +independent lists, but they share the structure that makes up their +last three elements. In addition, the original list is also still a +valid three-element list. + +Write a function `arrayToList` that builds up a data structure like +the previous one when given `[1, 2, 3]` as argument, and write a +`listToArray` function that produces an array from a list. Also write +the helper functions `prepend`, which takes an element and a list and +creates a new list that adds the element to the front of the input +list, and `nth`, which takes a list and a number and returns the +element at the given position in the list, or `undefined` when there +is no such element. + +{{index recursion}} + +If you haven't already, also write a recursive version +of `nth`. + +{{if interactive + +{{test no}} + +``` +// Your code here. + +console.log(arrayToList([10, 20])); +// → {value: 10, rest: {value: 20, rest: null}} +console.log(listToArray(arrayToList([10, 20, 30]))); +// → [10, 20, 30] +console.log(prepend(10, prepend(20, null))); +// → {value: 10, rest: {value: 20, rest: null}} +console.log(nth(arrayToList([10, 20, 30]), 1)); +// → 20 +``` +if}} + +{{hint + +{{index "list (exercise)", "linked list"}} + +Building up a list is best done +back to front. So `arrayToList` could iterate over the array backward +(see previous exercise) and, for each element, add an object to the +list. You can use a local variable to hold the part of the list that +was built so far and use a pattern like `list = {value: X, rest: +list}` to add an element. + +{{index "for loop"}} + +To run over a list (in `listToArray` and `nth`), a `for` +loop specification like this can be used: + +``` +for (var node = list; node; node = node.rest) {} +``` + +Can you see how that works? Every iteration of the loop, `node` points +to the current sublist, and the body can read its `value` property to +get the current element. At the end of an iteration, `node` moves to +the next sublist. When that is null, we have reached the end of the +list and the loop is finished. + +{{index recursion}} + +The recursive version of `nth` will, similarly, look at +an ever smaller part of the “tail” of the list and at the same time +count down the index until it reaches zero, at which point it can +return the `value` property of the node it is looking at. To get the +zeroeth element of a list, you simply take the `value` property of its +head node. To get element _N_ + 1, you take the _N_th element of the +list that's in this list's `rest` property. + +hint}} + +{{id exercise_deep_compare}} +### Deep comparison + +{{index "deep comparison (exercise)", comparison, "deep comparison", "== operator"}} + +The `==` operator compares objects by +identity. But sometimes, you would prefer to compare the values of +their actual properties. + +Write a function, `deepEqual`, that takes two values and returns true +only if they are the same value or are objects with the same +properties whose values are also equal when compared with a recursive +call to `deepEqual`. + +{{index null, "=== operator", "typeof operator"}} + +To find out whether +to compare two things by identity (use the `===` operator for that) or +by looking at their properties, you can use the `typeof` operator. If +it produces `"object"` for both values, you should do a deep +comparison. But you have to take one silly exception into account: by +a historical accident, `typeof null` also produces `"object"`. + +{{if interactive + +{{test no}} + +``` +// Your code here. + +var obj = {here: {is: "an"}, object: 2}; +console.log(deepEqual(obj, obj)); +// → true +console.log(deepEqual(obj, {here: 1, object: 2})); +// → false +console.log(deepEqual(obj, {here: {is: "an"}, object: 2})); +// → true +``` +if}} + +{{hint + +{{index "deep comparison (exercise)", "typeof operator", object, "=== operator"}} + +Your test for whether you are dealing with a +real object will look something like `typeof x == "object" && x != +null`. Be careful to compare properties only when _both_ arguments are +objects. In all other cases you can just immediately return the result +of applying `===`. + +{{index "for/in loop", "in operator"}} + +Use a `for`/`in` loop to go over the +properties. You need to test whether both objects have the same set of +property names and whether those properties have identical values. The +first test can be done by counting the properties in both objects and +returning false if the numbers of properties are different. If they're +the same, then go over the properties of one object, and for each of +them, verify that the other object also has the property. The values +of the properties are compared by a recursive call to `deepEqual`. + +{{index "return value"}} + +Returning the correct value from the function is +best done by immediately returning false when a mismatch is noticed +and returning true at the end of the function. + +hint}} + diff --git a/05_higher_order.md b/05_higher_order.md new file mode 100644 index 000000000..e5d8e29e2 --- /dev/null +++ b/05_higher_order.md @@ -0,0 +1,1194 @@ +{{meta {chap_num: 5, prev_link: 04_data, next_link: 06_object, load_files: ["code/ancestry.js", "code/chapter/05_higher_order.js", "code/intro.js"], zip: "node/html"}}} + +# Higher-Order Functions + +{{if interactive + +{{quote {author: "Master Yuan-Ma", title: "The Book of Programming", chapter: true} + +Tzu-li and Tzu-ssu were +boasting about the size of their latest programs. ‘Two-hundred +thousand lines,’ said Tzu-li, ‘not counting comments!’ Tzu-ssu +responded, ‘Pssh, mine is almost a *million* lines already.’ Master +Yuan-Ma said, ‘My best program has five hundred lines.’ Hearing this, +Tzu-li and Tzu-ssu were enlightened. + +quote}} + +if}} + +{{quote {author: "C.A.R. Hoare", title: "1980 ACM Turing Award Lecture", chapter: true} + +{{index "Hoare, C.A.R."}} + +There are two ways of constructing a software +design: One way is to make it so simple that there are obviously no +deficiencies, and the other way is to make it so complicated that +there are no obvious deficiencies. + +quote}} + +{{index "program size"}} + +A large program is a costly program, and not just +because of the time it takes to build. Size almost always involves +((complexity)), and complexity confuses programmers. Confused +programmers, in turn, tend to introduce mistakes (_((bug))s_) into +programs. A large program also provides a lot of space for these bugs +to hide, making them hard to find. + +{{index "summing example"}} + +Let's briefly go back to the final two example +programs in the introduction. The first is self-contained and six +lines long. + +``` +var total = 0, count = 1; +while (count <= 10) { + total += count; + count += 1; +} +console.log(total); +``` + +The second relies on two external functions and is one line long. + +``` +console.log(sum(range(1, 10))); +``` + +Which one is more likely to contain a bug? + +{{index "program size"}} + +If we count the size of the definitions of `sum` and +`range`, the second program is also big—even bigger than the first. +But still, I'd argue that it is more likely to be correct. + +{{index abstraction, "domain-specific language"}} + +It is more likely to +be correct because the solution is expressed in a ((vocabulary)) that +corresponds to the problem being solved. Summing a range of +numbers isn't about loops and counters. It is about ranges and sums. + +The definitions of this vocabulary (the functions `sum` and `range`) +will still involve loops, counters, and other incidental details. But +because they are expressing simpler concepts than the program as a +whole, they are easier to get right. + +## Abstraction + +In the context of programming, these kinds of vocabularies are usually +called _((abstraction))s_. Abstractions hide details and give us the +ability to talk about problems at a higher (or more abstract) level. + +{{index "recipe analogy", "pea soup"}} + +As an analogy, compare these two +recipes for pea soup: + +{{quote + +Put 1 cup of dried peas per person into a container. Add water until +the peas are well covered. Leave the peas in water for at least 12 hours. +Take the peas out of the water and put them in a cooking pan. Add 4 +cups of water per person. Cover the pan and keep the peas +simmering for two hours. Take half an onion per person. Cut it into +pieces with a knife. Add it to the peas. Take a stalk of celery per +person. Cut it into pieces with a knife. Add it to the peas. Take a +carrot per person. Cut it into pieces. With a knife! Add it to the +peas. Cook for 10 more minutes. + +quote}} + +And the second recipe: + +{{quote + +Per person: 1 cup dried split peas, half a chopped onion, a stalk of +celery, and a carrot. + +Soak peas for 12 hours. Simmer for 2 hours in 4 cups of water +(per person). Chop and add vegetables. Cook for 10 more minutes. + +quote}} + +{{index vocabulary}} + +The second is shorter and easier to interpret. But +you do need to understand a few more cooking-related words—_soak_, +_simmer_, _chop_, and, I guess, _vegetable_. + +When programming, we can't rely on all the words we need to be waiting +for us in the dictionary. Thus, you might fall into the pattern of the +first recipe—work out the precise steps the computer has to perform, +one by one, blind to the higher-level concepts that they express. + +{{index abstraction}} + +It has to become second nature, for a programmer, to +notice when a concept is begging to be abstracted into a new word. + +## Abstracting array traversal + +{{index array}} + +Plain functions, as we've seen them so far, are a good +way to build abstractions. But sometimes they fall short. + +{{index "for loop"}} + +In the [previous chapter](04_data.html#data), this +type of `for` ((loop)) made several appearances: + +``` +var array = [1, 2, 3]; +for (var i = 0; i < array.length; i++) { + var current = array[i]; + console.log(current); +} +``` + +{{index ["length property", "for array"], [array, indexing], readability}} + +It's trying to say, “For +each element in the array, log it to the console”. But it uses a +roundabout way that involves a counter variable `i`, a check against +the array's length, and an extra variable declaration to pick out the +current element. Apart from being a bit of an eyesore, this provides a +lot of space for potential mistakes. We might accidentally reuse the +`i` variable, misspell `length` as `lenght`, confuse the `i` and `current` +variables, and so on. + +So let's try to abstract this into a function. Can you think of a way? + +Well, it's easy to write a function that goes over an array and calls +`console.log` on every element. + +``` +function logEach(array) { + for (var i = 0; i < array.length; i++) + console.log(array[i]); +} +``` + +{{index [function, "higher-order"], loop, [array, traversal], [function, "as value"], "forEach method"}} + +{{id forEach}} +{{indexsee "higher-order function", "function, higher-order"}} + +But what +if we want to do something other than logging the elements? Since +“doing something” can be represented as a function and functions are +just values, we can pass our action as a function value. + +``` +function forEach(array, action) { + for (var i = 0; i < array.length; i++) + action(array[i]); +} + +forEach(["Wampeter", "Foma", "Granfalloon"], console.log); +// → Wampeter +// → Foma +// → Granfalloon +``` + +(In some browsers, calling `console.log` in this way does not work. +You can use `alert` instead of `console.log` if this example fails to +work.) + +Often, you don't pass a predefined function to `forEach` but create +a function value on the spot instead. + +``` +var numbers = [1, 2, 3, 4, 5], sum = 0; +forEach(numbers, function(number) { + sum += number; +}); +console.log(sum); +// → 15 +``` + +{{index "loop body", "curly braces"}} + +This looks quite a lot like the +classical `for` loop, with its body written as a block below it. +However, now the body is inside the function value, as well as +inside the ((parentheses)) of the call to `forEach`. This is why it +has to be closed with the closing brace _and_ closing parenthesis. + +{{index "local variable", parameter}} + +Using this pattern, we can +specify a variable name for the current element (`number`), rather +than having to pick it out of the array manually. + +{{index [array, methods], [function, "higher-order"], "forEach method", array}} + +In fact, we don't need to write `forEach` +ourselves. It is available as a standard method on arrays. Since the +array is already provided as the thing the method acts on, `forEach` +takes only one required argument: the function to be executed for each +element. + +To illustrate how helpful this is, let's look back at a function +from [the previous chapter](04_data.html#analysis). It contains two +array-traversing ((loop))s. + +``` +function gatherCorrelations(journal) { + var phis = {}; + for (var entry = 0; entry < journal.length; entry++) { + var events = journal[entry].events; + for (var i = 0; i < events.length; i++) { + var event = events[i]; + if (!(event in phis)) + phis[event] = phi(tableFor(event, journal)); + } + } + return phis; +} +``` + +{{index "forEach method"}} + +Working with `forEach` makes it slightly shorter +and quite a bit cleaner. + +``` +function gatherCorrelations(journal) { + var phis = {}; + journal.forEach(function(entry) { + entry.events.forEach(function(event) { + if (!(event in phis)) + phis[event] = phi(tableFor(event, journal)); + }); + }); + return phis; +} +``` + +## Higher-order functions + +{{index [function, "higher-order"], [function, "as value"]}} + +Functions that +operate on other functions, either by taking them as arguments or by +returning them, are called _higher-order functions_. If you have +already accepted the fact that functions are regular values, there is +nothing particularly remarkable about the fact that such functions +exist. The term comes from ((mathematics)), where the distinction +between functions and other values is taken more seriously. + +{{index abstraction}} + +Higher-order functions allow us to abstract over +_actions_, not just values. They come in several forms. For example, +you can have functions that create new functions. + +``` +function greaterThan(n) { + return function(m) { return m > n; }; +} +var greaterThan10 = greaterThan(10); +console.log(greaterThan10(11)); +// → true +``` + +And you can have functions that change other functions. + +``` +function noisy(f) { + return function(arg) { + console.log("calling with", arg); + var val = f(arg); + console.log("called with", arg, "- got", val); + return val; + }; +} +noisy(Boolean)(0); +// → calling with 0 +// → called with 0 - got false +``` + +You can even write functions that provide new types of ((control flow)). + +``` +function unless(test, then) { + if (!test) then(); +} +function repeat(times, body) { + for (var i = 0; i < times; i++) body(i); +} + +repeat(3, function(n) { + unless(n % 2, function() { + console.log(n, "is even"); + }); +}); +// → 0 is even +// → 2 is even +``` + +{{index "inner function", [nesting, "of functions"], "{} (block)", "local variable", closure}} + +The ((lexical scoping)) +rules that we discussed in [Chapter 3](03_functions.html#scoping) +work to our advantage when using functions in this way. In the previous example, the `n` variable is a ((parameter)) to the outer function. +Because the inner function lives inside the environment of the outer +one, it can use `n`. The bodies of such inner functions can access the +variables around them. They can play a role similar to the `{}` blocks +used in regular loops and conditional statements. An important +difference is that variables declared inside inner functions do not +end up in the environment of the outer function. And that is usually a +good thing. + +## Passing along arguments + +{{index [function, wrapping], "arguments object"}} + +The `noisy` function +defined earlier, which wraps its argument in another function, has a rather +serious deficit. + +``` +function noisy(f) { + return function(arg) { + console.log("calling with", arg); + var val = f(arg); + console.log("called with", arg, "- got", val); + return val; + }; +} +``` + +If `f` takes more than one ((parameter)), it gets only the first one. +We could add a bunch of arguments to the inner function (`arg1`, +`arg2`, and so on) and pass them all to `f`, but it is not clear how many +would be enough. This solution would also deprive `f` of the +information in `arguments.length`. Since we'd always pass the same +number of arguments, it wouldn't know how many arguments were +originally given. + +{{index "apply method", "array-like object", [function, application]}} + +For +these kinds of situations, JavaScript functions have an `apply` +method. You pass it an array (or array-like object) of arguments, and +it will call the function with those arguments. + +``` +function transparentWrapping(f) { + return function() { + return f.apply(null, arguments); + }; +} +``` + +{{index null}} + +That's a useless function, but it shows the pattern we are +interested in—the function it returns passes all of the given +arguments, and only those arguments, to `f`. It does this by passing +its own `arguments` object to `apply`. The first argument to `apply`, +for which we are passing `null` here, can be used to simulate a +((method)) call. We will come back to that in the +[next chapter](06_object.html#call_method). + +## JSON + +{{index array, [function, "higher-order"], "forEach method", "data set"}} + +Higher-order functions that somehow apply a function to the +elements of an array are widely used in JavaScript. The `forEach` +method is the most primitive such function. There are a number of +other variants available as methods on arrays. To familiarize +ourselves with them, let's play around with another data set. + +{{index "ancestry example"}} + +A few years ago, someone crawled through a lot +of archives and put together a book on the history of my family name +(Haverbeke—meaning Oatbrook). I opened it hoping to find +knights, pirates, and alchemists ... but the book turns out to be +mostly full of Flemish ((farmer))s. For my amusement, I extracted the +information on my direct ancestors and put it into a +computer-readable format. + +{{index "data format", JSON}} + +The file I created looks something like +this: + +```application/json +[ + {"name": "Emma de Milliano", "sex": "f", + "born": 1876, "died": 1956, + "father": "Petrus de Milliano", + "mother": "Sophia van Damme"}, + {"name": "Carolus Haverbeke", "sex": "m", + "born": 1832, "died": 1905, + "father": "Carel Haverbeke", + "mother": "Maria van Brussel"}, + … and so on +] +``` + +{{index "World Wide Web"}} + +{{indexsee "JavaScript Object Notation", JSON}} + +This format is called JSON (pronounced “Jason”), +which stands for JavaScript Object Notation. It is widely used as a +data storage and communication format on the Web. + +{{index array, object, [quoting, "in JSON"], comment}} + +JSON is similar to +JavaScript's way of writing arrays and objects, with a few +restrictions. All property names have to be surrounded by double quotes, and +only simple data expressions are allowed—no function calls, +variables, or anything that involves actual computation. Comments are not +allowed in JSON. + +{{index "JSON.stringify function", "JSON.parse function", serialization, deserialization, parsing}} + +JavaScript +gives us functions, `JSON.stringify` and `JSON.parse`, that convert +data to and from this format. The first takes a JavaScript value and +returns a JSON-encoded string. The second takes such a string and +converts it to the value it encodes. + +``` +var string = JSON.stringify({name: "X", born: 1980}); +console.log(string); +// → {"name":"X","born":1980} +console.log(JSON.parse(string).born); +// → 1980 +``` + +{{index "ANCESTRY_FILE data set"}} + +The variable `ANCESTRY_FILE`, available in +the ((sandbox)) for this chapter and in +http://eloquentjavascript.net/code/ancestry.js[a downloadable file] on +the website(!book (http://eloquentjavascript.net/code#5[_eloquentjavascript.net/code#5_])!), contains the +content of my ((JSON)) file as a string. Let's decode it and see how +many people it contains. + +{{includeCode "strip_log"}} + +``` +var ancestry = JSON.parse(ANCESTRY_FILE); +console.log(ancestry.length); +// → 39 +``` + +## Filtering an array + +{{index [array, methods], [array, filtering], "filter method", [function, "higher-order"], "predicate function"}} + +To find +the people in the ancestry data set who were young in 1924, the +following function might be helpful. It filters out the elements in an +array that don't pass a test. + +``` +function filter(array, test) { + var passed = []; + for (var i = 0; i < array.length; i++) { + if (test(array[i])) + passed.push(array[i]); + } + return passed; +} + +console.log(filter(ancestry, function(person) { + return person.born > 1900 && person.born < 1925; +})); +// → [{name: "Philibert Haverbeke", …}, …] +``` + +{{index [function, "as value"], [function, application]}} + +This uses the +argument named `test`, a function value, to fill in a “gap” in the +computation. The `test` function is called for each element, and its +return value determines whether an element is included in the returned +array. + +{{index "ancestry example"}} + +Three people in the file were alive and young in +1924: my grandfather, grandmother, and great-aunt. + +{{index "filter method", "pure function", "side effect"}} + +Note how the +`filter` function, rather than deleting elements from the existing +array, builds up a new array with only the elements that pass the +test. This function is _pure_. It does not modify the array it is +given. + +Like `forEach`, `filter` is also a ((standard)) method on arrays. The +example defined the function only in order to show what it does +internally. From now on, we'll use it like this instead: + +``` +console.log(ancestry.filter(function(person) { + return person.father == "Carel Haverbeke"; +})); +// → [{name: "Carolus Haverbeke", …}] +``` + +## Transforming with map + +{{index [array, methods], "map method", "ancestry example"}} + +Say we +have an array of objects representing people, produced by filtering +the `ancestry` array somehow. But we want an array of names, which is +easier to read. + +{{index [function, "higher-order"]}} + +The `map` method transforms an array by +applying a function to all of its elements and building a new array +from the returned values. The new array will have the same length as +the input array, but its content will have been “mapped” to a new form +by the function. + +{{test join}} + +``` +function map(array, transform) { + var mapped = []; + for (var i = 0; i < array.length; i++) + mapped.push(transform(array[i])); + return mapped; +} + +var overNinety = ancestry.filter(function(person) { + return person.died - person.born > 90; +}); +console.log(map(overNinety, function(person) { + return person.name; +})); +// → ["Clara Aernoudts", "Emile Haverbeke", +// "Maria Haverbeke"] +``` + +Interestingly, the people who lived to at least 90 years of age are the +same three people who we saw before—the people who were young in the +1920s, which happens to be the most recent generation in my data set. +I guess ((medicine)) has come a long way. + +Like `forEach` and `filter`, `map` is also a standard method on +arrays. + +## Summarizing with reduce + +{{index [array, methods], "summing example", "reduce method", "ancestry example"}} + +Another common pattern of computation on arrays is computing +a single value from them. Our recurring example, summing a collection +of numbers, is an instance of this. Another example would be finding +the person with the earliest year of birth in the data set. + +{{index [function, "higher-order"], "fold function"}} + +The higher-order +operation that represents this pattern is called _reduce_ (or +sometimes _fold_). You can think of it as folding up the array, one +element at a time. When summing numbers, you'd start with the number +zero and, for each element, combine it with the current sum by adding +the two. + +The parameters to the `reduce` function are, apart from the array, a +combining function and a start value. This function is a little less +straightforward than `filter` and `map`, so pay close attention. + +``` +function reduce(array, combine, start) { + var current = start; + for (var i = 0; i < array.length; i++) + current = combine(current, array[i]); + return current; +} + +console.log(reduce([1, 2, 3, 4], function(a, b) { + return a + b; +}, 0)); +// → 10 +``` + +{{index "reduce method"}} + +The standard array method `reduce`, which of course +corresponds to this function, has an added convenience. If your array +contains at least one element, you are allowed to leave off the +`start` argument. The method will take the first element of the array +as its start value and start reducing at the second element. + +{{index "ancestry example", minimum}} + +To use `reduce` to find my most +ancient known ancestor, we can write something like this: + +{{test no}} + +``` +console.log(ancestry.reduce(function(min, cur) { + if (cur.born < min.born) return cur; + else return min; +})); +// → {name: "Pauwels van Haverbeke", born: 1535, …} +``` + +## Composability + +{{index loop, minimum, "ancestry example"}} + +Consider how we would +have written the previous example (finding the person with the +earliest year of birth) without higher-order functions. The code is +not that much worse. + +{{test no}} + +``` +var min = ancestry[0]; +for (var i = 1; i < ancestry.length; i++) { + var cur = ancestry[i]; + if (cur.born < min.born) + min = cur; +} +console.log(min); +// → {name: "Pauwels van Haverbeke", born: 1535, …} +``` + +There are a few more ((variable))s, and the program is two lines +longer but still quite easy to understand. + +{{index "average function", composability, [function, "higher-order"]}} + +{{id average_function}} +Higher-order +functions start to shine when you need to _compose_ functions. As an +example, let's write code that finds the average age for men and for +women in the data set. + +{{test clip}} + +``` +function average(array) { + function plus(a, b) { return a + b; } + return array.reduce(plus) / array.length; +} +function age(p) { return p.died - p.born; } +function male(p) { return p.sex == "m"; } +function female(p) { return p.sex == "f"; } + +console.log(average(ancestry.filter(male).map(age))); +// → 61.67 +console.log(average(ancestry.filter(female).map(age))); +// → 54.56 +``` + +{{index "plus function", "+ operator", [function, "as value"]}} + +(It's a bit +silly that we have to define `plus` as a function, but operators in +JavaScript, unlike functions, are not values, so you can't pass them +as arguments.) + +{{index abstraction, vocabulary}} + +Instead of tangling the logic into a +big ((loop)), it is neatly composed into the concepts we are +interested in—determining sex, computing age, and averaging numbers. We +can apply these one by one to get the result we are looking for. + +This is _fabulous_ for writing clear code. Unfortunately, this clarity +comes at a cost. + +## The cost + +{{index efficiency, optimization}} + +In the happy land of elegant code +and pretty rainbows, there lives a spoil-sport monster called +_inefficiency_. + +{{index elegance, [array, creation], "pure function", composability}} + +A program that processes an array is most +elegantly expressed as a sequence of cleanly separated steps that each +do something with the array and produce a new array. But building up +all those intermediate arrays is somewhat expensive. + +{{index readability, [function, application], "forEach method", [function, "as value"]}} + +Likewise, passing a function to +`forEach` and letting that method handle the array iteration for us is +convenient and easy to read. But function calls in JavaScript are +costly compared to simple loop bodies. + +{{index abstraction}} + +And so it goes with a lot of techniques that help +improve the clarity of a program. Abstractions add layers between the +raw things the computer is doing and the concepts we are working with +and thus cause the machine to perform more work. This is not an iron +law—there are programming languages that have better support for +building abstractions without adding inefficiencies, and even in +JavaScript, an experienced programmer can find ways to write abstract +code that is still fast. But it is a problem that comes up a lot. + +{{index profiling}} + +Fortunately, most computers are insanely fast. If you +are processing a modest set of data or doing something that has +to happen only on a human time scale (say, every time the user clicks a +button), then it _does not matter_ whether you wrote a pretty solution +that takes half a millisecond or a super-optimized solution that takes +a tenth of a millisecond. + +{{index [nesting, "of loops"], "inner loop", complexity}} + +It is helpful to +roughly keep track of how often a piece of your program is going to +run. If you have a ((loop)) inside a loop (either directly or through +the outer loop calling a function that ends up performing the inner +loop), the code inside the inner loop will end up running _N__×__M__ +times, where _N_ is the number of times the outer loop repeats and +_M_ is the number of times the inner loop repeats within each iteration +of the outer loop. If that inner loop contains another loop that makes +_P_ rounds, its body will run __M__×__N__×__P_ times, and so on. This +can add up to large numbers, and when a program is slow, the problem +can often be traced to only a small part of the code, which sits inside an inner loop. + +## Great-great-great-great-... + +{{index "ancestry example"}} + +My ((grandfather)), Philibert Haverbeke, is +included in the data file. By starting with him, I can trace my +lineage to find out whether the most ancient person in the data, +Pauwels van Haverbeke, is my direct ancestor. And if he is, I would +like to know how much ((DNA)) I theoretically share with him. + +{{index "byName object", map, "data structure", [object, "as map"]}} + +To be able to go from a parent's name to the actual object that +represents this person, we first build up an object that associates +names with people. + +{{includeCode "strip_log"}} + +``` +var byName = {}; +ancestry.forEach(function(person) { + byName[person.name] = person; +}); + +console.log(byName["Philibert Haverbeke"]); +// → {name: "Philibert Haverbeke", …} +``` + +Now, the problem is not entirely as simple as following the `father` +properties and counting how many we need to reach Pauwels. There are +several cases in the family ((tree)) where people married their second +cousins (tiny villages and all that). This causes the branches of the +family tree to rejoin in a few places, which means I share more than +1/2^_G_^ of my genes with this person, where _G_ for the number of +generations between Pauwels and me. This formula comes from the idea +that each generation splits the gene pool in two. + +{{index "reduce method", "data structure"}} + +A reasonable way to think about +this problem is to look at it as being analogous to `reduce`, which +condenses an array to a single value by repeatedly combining +values, left to right. In this case, we also want to condense our data +structure to a single value but in a way that follows family +lines. The _shape_ of the data is that of a family tree, rather than a +flat list. + +The way we want to reduce this shape is by computing a value for a +given person by combining values from their ancestors. This can be +done recursively: if we are interested in person _A_, we have to +compute the values for _A__’s parents, which in turn requires us to +compute the value for __A_’s grandparents, and so on. In principle, +that'd require us to look at an infinite number of people, but since +our data set is finite, we have to stop somewhere. We'll allow a +((default value)) to be given to our reduction function, which will be +used for people who are not in the data. In our case, that value is +simply zero, on the assumption that people not in the list don't share +DNA with the ancestor we are looking at. + +{{index recursion, "reduceAncestors function"}} + +Given a person, a +function to combine values from the two parents of a given person, and +a default value, `reduceAncestors` condenses a value from a family +tree. + +// include_code + +``` +function reduceAncestors(person, f, defaultValue) { + function valueFor(person) { + if (person == null) + return defaultValue; + else + return f(person, valueFor(byName[person.mother]), + valueFor(byName[person.father])); + } + return valueFor(person); +} +``` + +{{index [function, "higher-order"]}} + +The inner function (`valueFor`) handles a +single person. Through the ((magic)) of recursion, it can simply call +itself to handle the father and the mother of this person. The +results, along with the person object itself, are passed to `f`, which +returns the actual value for this person. + +We can then use this to compute the amount of ((DNA)) my +((grandfather)) shared with Pauwels van Haverbeke and divide that by +four. + +{{startCode "bottom_lines: 2"}} +{{test clip}} +{{includeCode "top_lines: 6"}} + +``` +function sharedDNA(person, fromMother, fromFather) { + if (person.name == "Pauwels van Haverbeke") + return 1; + else + return (fromMother + fromFather) / 2; +} +var ph = byName["Philibert Haverbeke"]; +console.log(reduceAncestors(ph, sharedDNA, 0) / 4); +// → 0.00049 +``` + +The person with the name Pauwels van Haverbeke obviously shared 100 percent +of his DNA with Pauwels van Haverbeke (there are no people who share +names in the data set), so the function returns 1 for him. All other +people share the average of the amounts that their parents share. + +So, statistically speaking, I share about 0.05 percent of my ((DNA)) with +this 16th-century person. It should be noted that this is only a +statistical approximation, not an exact amount. It is a rather small +number, but given how much genetic material we carry (about 3 billion +base pairs), there's still probably some aspect in the biological +machine that is me that originates with Pauwels. + +{{index "ancestry example", "reduceAncestors function", abstraction}} + +We could also have computed this number +without relying on `reduceAncestors`. But separating the general +approach (condensing a family tree) from the specific case (computing +shared DNA) can improve the clarity of the code and allows us to reuse +the abstract part of the program for other cases. For example, the +following code finds the percentage of a person's known ancestors who +lived past 70 (by lineage, so people may be counted multiple times): + +{{test clip}} + +``` +function countAncestors(person, test) { + function combine(current, fromMother, fromFather) { + var thisOneCounts = current != person && test(current); + return fromMother + fromFather + (thisOneCounts ? 1 : 0); + } + return reduceAncestors(person, combine, 0); +} +function longLivingPercentage(person) { + var all = countAncestors(person, function(person) { + return true; + }); + var longLiving = countAncestors(person, function(person) { + return (person.died - person.born) >= 70; + }); + return longLiving / all; +} +console.log(longLivingPercentage(byName["Emile Haverbeke"])); +// → 0.129 +``` + +Such numbers are not to be taken too seriously, given that +our data set contains a rather arbitrary collection of people. But the +code illustrates the fact that `reduceAncestors` gives us a useful +piece of ((vocabulary)) for working with the family tree data +structure. + +## Binding + +{{index "bind method", "partial application", [function, application]}} + +The `bind` method, which all +functions have, creates a new function that will call the original +function but with some of the arguments already fixed. + +{{index "filter method", [function, "as value"]}} + +The following code shows an +example of `bind` in use. It defines a function `isInSet` that +tells us whether a person is in a given set of strings. To call +`filter` in order to collect those person objects whose names are in a +specific set, we can either write a function expression that makes a +call to `isInSet` with our set as its first argument or _partially +apply_ the `isInSet` function. + +``` +var theSet = ["Carel Haverbeke", "Maria van Brussel", + "Donald Duck"]; +function isInSet(set, person) { + return set.indexOf(person.name) > -1; +} + +console.log(ancestry.filter(function(person) { + return isInSet(theSet, person); +})); +// → [{name: "Maria van Brussel", …}, +// {name: "Carel Haverbeke", …}] +console.log(ancestry.filter(isInSet.bind(null, theSet))); +// → … same result +``` + +The call to `bind` returns a function that will call `isInSet` with +`theSet` as first argument, followed by any remaining arguments given +to the bound function. + +{{index null}} + +The first argument, where the example passes `null`, is used +for ((method call))s, similar to the first argument to `apply`. I'll +describe this in more detail in the +[next chapter](06_object.html#call_method). + +## Summary + +Being able to pass function values to other functions is not just a +gimmick but a deeply useful aspect of JavaScript. It allows us to +write computations with “gaps” in them as functions and have the code +that calls these functions fill in those gaps by providing function +values that describe the missing computations. + +Arrays provide a number of useful higher-order methods—`forEach` +to do something with each element in an array, `filter` to build a new +array with some elements filtered out, `map` to build a new array +where each element has been put through a function, and `reduce` to +combine all an array's elements into a single value. + +Functions have an `apply` method that can be used to call them with an +array specifying their arguments. They also have a `bind` method, +which is used to create a partially applied version of the function. + +## Exercises + +### Flattening + +{{index "flattening (exercise)", "reduce method", "concat method", array}} + +Use the `reduce` method in combination with +the `concat` method to “flatten” an array of arrays into a single +array that has all the elements of the input arrays. + +{{if interactive + +{{test no}} + +``` +var arrays = [[1, 2, 3], [4, 5], [6]]; +// Your code here. +// → [1, 2, 3, 4, 5, 6] +``` +if}} + +### Mother-child age difference + +{{index "ancestry example", "age difference (exercise)", "average function"}} + +Using the example data set from this chapter, compute the +average age difference between mothers and children (the age of the +mother when the child is born). You can use the `average` function +defined [earlier](05_higher_order.html#average_function) in this +chapter. + +{{index "byName object"}} + +Note that not all the mothers mentioned in the data +are themselves present in the array. The `byName` object, which makes +it easy to find a person's object from their name, might be useful +here. + +{{if interactive + +{{test no}} +// include_code + +``` +function average(array) { + function plus(a, b) { return a + b; } + return array.reduce(plus) / array.length; +} + +var byName = {}; +ancestry.forEach(function(person) { + byName[person.name] = person; +}); + +// Your code here. + +// → 31.2 +``` +if}} + +{{hint + +{{index "age difference (exercise)", "filter method", "map method", null, "average function"}} + +Because not all elements in +the `ancestry` array produce useful data (we can't compute the age +difference unless we know the birth date of the mother), we will have +to apply `filter` in some manner before calling `average`. You could +do it as a first pass, by defining a `hasKnownMother` function and +filtering on that first. Alternatively, you could start by calling +`map` and in your mapping function return either the age difference +or `null` if no mother is known. Then, you can call `filter` to remove +the `null` elements before passing the array to `average`. + +hint}} + +### Historical life expectancy + +{{index "life expectancy (exercise)"}} + +When we looked up all the people in +our data set that lived more than 90 years, only the latest +generation in the data came out. Let's take a closer look at that +phenomenon. + +{{index "average function"}} + +Compute and output the average age of the people +in the ancestry data set per century. A person is assigned to a +((century)) by taking their year of death, dividing it by 100, +and rounding it up, as in `Math.ceil(person.died / 100)`. + +{{if interactive + +{{test no}} + +``` +function average(array) { + function plus(a, b) { return a + b; } + return array.reduce(plus) / array.length; +} + +// Your code here. + +// → 16: 43.5 +// 17: 51.2 +// 18: 52.8 +// 19: 54.8 +// 20: 84.7 +// 21: 94 +``` +if}} + +{{hint + +{{index "life expectancy (exercise)"}} + +The essence of this example lies in +((grouping)) the elements of a collection by some aspect of +theirs—splitting the array of ancestors into smaller arrays with the +ancestors for each century. + +{{index array, map, [object, "as map"]}} + +During the grouping +process, keep an object that associates ((century)) names (numbers) +with arrays of either person objects or ages. Since we do not know in +advance what categories we will find, we'll have to create them on the +fly. For each person, after computing their century, we test whether +that century was already known. If not, add an array for it. Then add +the person (or age) to the array for the proper century. + +{{index "for/in loop", "average function"}} + +Finally, a `for`/`in` loop can +be used to print the average ages for the individual centuries. + +hint}} + +{{index grouping, map, [object, "as map"], "groupBy function"}} + +For +bonus points, write a function `groupBy` that abstracts the grouping +operation. It should accept as arguments an array and a function that +computes the group for an element in the array and returns an object +that maps group names to arrays of group members. + +### Every and then some + +{{index "predicate function", "every and some (exercise)", "every method", "some method", [array, methods], "&& operator", "|| operator"}} + +Arrays also come with the standard methods `every` and +`some`. Both take a predicate function that, when called with an array +element as argument, returns true or false. Just like `&&` +returns a true value only when the expressions on both sides are true, +`every` returns true only when the predicate returns true for _all_ +elements of the array. Similarly, `some` returns true as soon as the +predicate returns true for _any_ of the elements. They do not process +more elements than necessary—for example, if `some` finds that the +predicate holds for the first element of the array, it will not look +at the values after that. + +Write two functions, `every` and `some`, that behave like these +methods, except that they take the array as their first argument +rather than being a method. + +{{if interactive + +{{test no}} + +``` +// Your code here. + +console.log(every([NaN, NaN, NaN], isNaN)); +// → true +console.log(every([NaN, NaN, 4], isNaN)); +// → false +console.log(some([NaN, 3, 4], isNaN)); +// → true +console.log(some([2, 3, 4], isNaN)); +// → false +``` +if}} + +{{hint + +{{index "every and some (exercise)", "short-circuit evaluation", "return keyword"}} + +The functions can follow a similar pattern to the +[definition](05_higher_order.html#forEach) of `forEach` at the +start of the chapter, except that they must return immediately (with +the right value) when the predicate function returns false—or true. +Don't forget to put another `return` statement after the loop so that +the function also returns the correct value when it reaches the end of +the array. + +hint}} + diff --git a/05_higher_order.txt b/05_higher_order.txt index ef6edb5ed..b1c35a600 100644 --- a/05_higher_order.txt +++ b/05_higher_order.txt @@ -96,6 +96,7 @@ peas. Cook for 10 more minutes. ____ And the second recipe: + ____ Per person: 1 cup dried split peas, half a chopped onion, a stalk of celery, and a carrot. diff --git a/06_object.md b/06_object.md new file mode 100644 index 000000000..3b2883cd1 --- /dev/null +++ b/06_object.md @@ -0,0 +1,1342 @@ +{{meta {chap_num: 6, prev_link: 05_higher_order, next_link: 07_elife, load_files: ["code/mountains.js", "code/chapter/06_object.js"], zip: "node/html"}}} + +# The Secret Life of Objects + +{{quote {author: "Joe Armstrong", title: "interviewed in Coders at Work", chapter: true} + +The problem with object-oriented languages +is they’ve got all this implicit environment that they carry around +with them. You wanted a banana but what you got was a gorilla holding +the banana and the entire jungle. + +quote}} + +{{index "Armstrong, Joe", object, "holy war"}} + +When a programmer +says “object”, this is a loaded term. In my profession, objects are a +way of life, the subject of holy wars, and a beloved buzzword that +still hasn't quite lost its power. + +To an outsider, this is probably a little confusing. Let's start with +a brief ((history)) of objects as a programming construct. + +## History + +{{index isolation, history, "object-oriented programming", object}} + +This story, like most programming stories, starts with the +problem of ((complexity)). One philosophy is that complexity can be +made manageable by separating it into small compartments that are +isolated from each other. These compartments have ended up with the +name _objects_. + +{{index complexity, encapsulation, method, interface}} + +{{id interface}} +An +object is a hard shell that hides the gooey complexity inside it +and instead offers us a few knobs and connectors (such as ((method))s) +that present an _interface_ through which the object is to be used. +The idea is that the interface is relatively simple and all the +complex things going on _inside_ the object can be ignored when +working with it. + +{{figure {url: "img/object.jpg", alt: "A simple interface can hide a lot of complexity.",width: "6cm"}}} + +As an example, you can imagine an object that provides an interface to +an area on your screen. It provides a way to draw shapes or text onto +this area but hides all the details of how these shapes are converted +to the actual pixels that make up the screen. You'd have a set of +methods—for example, _drawCircle_—and those are the only things you +need to know in order to use such an object. + +{{index "object-oriented programming"}} + +These ideas were initially worked out +in the 1970s and 1980s and, in the 1990s, were carried up by a huge wave +of ((hype))—the object-oriented programming revolution. Suddenly, +there was a large tribe of people declaring that objects were the +_right_ way to program—and that anything that did not involve objects +was outdated nonsense. + +That kind of zealotry always produces a lot of impractical silliness, +and there has been a sort of counter-revolution since then. In some +circles, objects have a rather bad reputation nowadays. + +I prefer to look at the issue from a practical, rather than +ideological, angle. There are several useful concepts, most +importantly that of _((encapsulation))_ (distinguishing between +internal complexity and external interface), that the object-oriented +culture has popularized. These are worth studying. + +This chapter describes JavaScript's rather eccentric take on objects +and the way they relate to some classical object-oriented techniques. + +{{id obj_methods}} +## Methods + +{{index "rabbit example", method, property}} + +Methods are simply +properties that hold function values. This is a simple method: + +``` +var rabbit = {}; +rabbit.speak = function(line) { + console.log("The rabbit says '" + line + "'"); +}; + +rabbit.speak("I'm alive."); +// → The rabbit says 'I'm alive.' +``` + +{{index this, "method call"}} + +Usually a method needs to do something with +the object it was called on. When a function is called as a +method—looked up as a property and immediately called, as in +_object.method()_—the special variable `this` in its body will point +to the object that it was called on. + +{{test join}} +{{includeCode "top_lines:6"}} + +``` +function speak(line) { + console.log("The " + this.type + " rabbit says '" + + line + "'"); +} +var whiteRabbit = {type: "white", speak: speak}; +var fatRabbit = {type: "fat", speak: speak}; + +whiteRabbit.speak("Oh my ears and whiskers, " + + "how late it's getting!"); +// → The white rabbit says 'Oh my ears and whiskers, how +// late it's getting!' +fatRabbit.speak("I could sure use a carrot right now."); +// → The fat rabbit says 'I could sure use a carrot +// right now.' +``` + +{{index "apply method", "bind method", this, "rabbit example"}} + +The +code uses the `this` keyword to output the type of rabbit that is +speaking. Recall that the `apply` and `bind` methods both take a first +argument that can be used to simulate method calls. This first +argument is in fact used to give a value to `this`. + +{{index "call method"}} + +{{id call_method}} +There is a method similar to `apply`, called `call`. +It also calls the function it is a method of but takes its arguments +normally, rather than as an array. Like `apply` and `bind`, `call` can +be passed a specific `this` value. + +``` +speak.apply(fatRabbit, ["Burp!"]); +// → The fat rabbit says 'Burp!' +speak.call({type: "old"}, "Oh my."); +// → The old rabbit says 'Oh my.' +``` + +{{id prototypes}} +## Prototypes + +{{index "toString method"}} + +Watch closely. + +``` +var empty = {}; +console.log(empty.toString); +// → function toString(){…} +console.log(empty.toString()); +// → [object Object] +``` + +{{index magic}} + +I just pulled a property out of an empty object. Magic! + +{{index property, object}} + +Well, not really. I have simply been +withholding information about the way JavaScript objects work. In +addition to their set of properties, almost all objects also have a +_prototype_. A ((prototype)) is another object that is used as a +fallback source of properties. When an object gets a request for a +property that it does not have, its prototype will be searched for the +property, then the prototype's prototype, and so on. + +{{index "Object prototype"}} + +So who is the ((prototype)) of that empty +object? It is the great ancestral prototype, the entity behind almost +all objects, `Object.prototype`. + +``` +console.log(Object.getPrototypeOf({}) == + Object.prototype); +// → true +console.log(Object.getPrototypeOf(Object.prototype)); +// → null +``` + +{{index "getPrototypeOf function"}} + +As you might expect, the +`Object.getPrototypeOf` function returns the prototype of an object. + +{{index "toString method"}} + +The prototype relations of JavaScript objects +form a ((tree))-shaped structure, and at the root of this structure +sits `Object.prototype`. It provides a few ((method))s that show up in +all objects, such as `toString`, which converts an object to a string +representation. + +{{index inheritance, "Function prototype", "Array prototype", "Object prototype"}} + +Many objects don't directly have +`Object.prototype` as their ((prototype)), but instead have another +object, which provides its own default properties. Functions derive +from `Function.prototype`, and arrays derive from `Array.prototype`. + +``` +console.log(Object.getPrototypeOf(isNaN) == + Function.prototype); +// → true +console.log(Object.getPrototypeOf([]) == + Array.prototype); +// → true +``` + +{{index "Object prototype"}} + +Such a prototype object will itself have a +prototype, often `Object.prototype`, so that it still indirectly +provides methods like `toString`. + +{{index "getPrototypeOf function", "rabbit example", "Object.create function"}} + +The `Object.getPrototypeOf` function obviously returns the +prototype of an object. You can use `Object.create` to create an +object with a specific ((prototype)). + +``` +var protoRabbit = { + speak: function(line) { + console.log("The " + this.type + " rabbit says '" + + line + "'"); + } +}; +var killerRabbit = Object.create(protoRabbit); +killerRabbit.type = "killer"; +killerRabbit.speak("SKREEEE!"); +// → The killer rabbit says 'SKREEEE!' +``` + +{{index "shared property"}} + +The “proto” rabbit acts as a container for the +properties that are shared by all rabbits. An individual rabbit +object, like the killer rabbit, contains properties that apply only to +itself—in this case its type—and derives shared properties from its +prototype. + +{{id constructors}} +## Constructors + +{{index "new operator", this, "return keyword", [object, creation]}} + +A more convenient way to create objects that derive +from some shared prototype is to use a _((constructor))_. In +JavaScript, calling a function with the `new` keyword in front of it +causes it to be treated as a constructor. The constructor will have +its `this` variable bound to a fresh object, and unless it explicitly +returns another object value, this new object will be returned from +the call. + +An object created with `new` is said to be an _((instance))_ of its +constructor. + +{{index "rabbit example", capitalization}} + +Here is a simple constructor +for rabbits. It is a convention to capitalize the names of +constructors so that they are easily distinguished from other +functions. + +{{includeCode "top_lines:6"}} + +``` +function Rabbit(type) { + this.type = type; +} + +var killerRabbit = new Rabbit("killer"); +var blackRabbit = new Rabbit("black"); +console.log(blackRabbit.type); +// → black +``` + +{{index "prototype property", constructor}} + +Constructors (in fact, all +functions) automatically get a property named `prototype`, which by +default holds a plain, empty object that derives from +`Object.prototype`. Every instance created with this constructor will +have this object as its ((prototype)). So to add a `speak` method to +rabbits created with the `Rabbit` constructor, we can simply do this: + +{{includeCode "top_lines:4"}} + +``` +Rabbit.prototype.speak = function(line) { + console.log("The " + this.type + " rabbit says '" + + line + "'"); +}; +blackRabbit.speak("Doom..."); +// → The black rabbit says 'Doom...' +``` + +{{index "prototype property", "getPrototypeOf function"}} + +It is important +to note the distinction between the way a prototype is associated with +a constructor (through its `prototype` property) and the way objects +_have_ a prototype (which can be retrieved with +`Object.getPrototypeOf`). The actual prototype of a constructor is +`Function.prototype` since constructors are functions. Its +`prototype` _property_ will be the prototype of instances created +through it but is not its _own_ prototype. + +## Overriding derived properties + +{{index "shared property", overriding}} + +When you add a ((property)) to an +object, whether it is present in the prototype or not, the property is +added to the object _itself_, which will henceforth have it as its own +property. If there _is_ a property by the same name in the prototype, +this property will no longer affect the object. The prototype itself +is not changed. + +``` +Rabbit.prototype.teeth = "small"; +console.log(killerRabbit.teeth); +// → small +killerRabbit.teeth = "long, sharp, and bloody"; +console.log(killerRabbit.teeth); +// → long, sharp, and bloody +console.log(blackRabbit.teeth); +// → small +console.log(Rabbit.prototype.teeth); +// → small +``` + +{{index [prototype, diagram]}} + +The following diagram sketches the situation +after this code has run. The `Rabbit` and `Object` ((prototype))s lie +behind `killerRabbit` as a kind of backdrop, where properties that are +not found in the object itself can be looked up. + +{{figure {url: "img/rabbits.svg", alt: "Rabbit object prototype schema",width: "8cm"}}} + +{{index "shared property"}} + +Overriding properties that exist in a prototype +is often a useful thing to do. As the rabbit teeth example shows, it +can be used to express exceptional properties in instances of a more +generic class of objects, while letting the nonexceptional objects +simply take a standard value from their prototype. + +{{index "toString method", "Array prototype", "Function prototype"}} + +It +is also used to give the standard function and array prototypes a +different `toString` method than the basic object prototype. + +``` +console.log(Array.prototype.toString == + Object.prototype.toString); +// → false +console.log([1, 2].toString()); +// → 1,2 +``` + +{{index "toString method", "join method", "call method"}} + +Calling +`toString` on an array gives a result similar to calling `.join(",")` +on it—it puts commas between the values in the array. Directly calling +`Object.prototype.toString` with an array produces a different string. +That function doesn't know about arrays, so it simply puts the word +“object” and the name of the type between square brackets. + +``` +console.log(Object.prototype.toString.call([1, 2])); +// → [object Array] +``` + +## Prototype interference + +{{index [prototype, interference], "rabbit example", mutability}} + +A +((prototype)) can be used at any time to add new properties and +methods to all objects based on it. For example, it might become +necessary for our rabbits to dance. + +``` +Rabbit.prototype.dance = function() { + console.log("The " + this.type + " rabbit dances a jig."); +}; +killerRabbit.dance(); +// → The killer rabbit dances a jig. +``` + +{{index map, [object, "as map"]}} + +That's convenient. But there are +situations where it causes problems. In previous chapters, we used an +object as a way to associate values with names by creating properties +for the names and giving them the corresponding value as their value. +Here's an example from [Chapter 4](04_data.html#object_map): + +// include_code + +``` +var map = {}; +function storePhi(event, phi) { + map[event] = phi; +} + +storePhi("pizza", 0.069); +storePhi("touched tree", -0.081); +``` + +{{index "for/in loop", "in operator"}} + +We can iterate over all phi values +in the object using a `for`/`in` loop and test whether a name is in +there using the regular `in` operator. But unfortunately, the object's +prototype gets in the way. + +``` +Object.prototype.nonsense = "hi"; +for (var name in map) + console.log(name); +// → pizza +// → touched tree +// → nonsense +console.log("nonsense" in map); +// → true +console.log("toString" in map); +// → true + +// Delete the problematic property again +delete Object.prototype.nonsense; +``` + +{{index [prototype, pollution], "toString method"}} + +That's all wrong. There +is no event called “nonsense” in our data set. And there _definitely_ +is no event called “toString”. + +{{index enumerability, "for/in loop", property}} + +Oddly, `toString` +did not show up in the `for`/`in` loop, but the `in` operator did +return true for it. This is because JavaScript distinguishes between +_enumerable_ and _nonenumerable_ properties. + +{{index "Object prototype"}} + +All properties that we create by simply +assigning to them are enumerable. The standard properties in +`Object.prototype` are all nonenumerable, which is why they do not +show up in such a `for`/`in` loop. + +{{index "defineProperty function"}} + +It is possible to define our own +nonenumerable properties by using the `Object.defineProperty` +function, which allows us to control the type of property we are +creating. + +``` +Object.defineProperty(Object.prototype, "hiddenNonsense", + {enumerable: false, value: "hi"}); +for (var name in map) + console.log(name); +// → pizza +// → touched tree +console.log(map.hiddenNonsense); +// → hi +``` + +{{index "in operator", map, [object, "as map"], "hasOwnProperty method"}} + +So now the property is there, but it won't show up in a loop. +That's good. But we still have the problem with the regular `in` +operator claiming that the `Object.prototype` properties exist in our +object. For that, we can use the object's `hasOwnProperty` method. + +``` +console.log(map.hasOwnProperty("toString")); +// → false +``` + +{{index [property, own]}} + +This method tells us whether the object _itself_ has +the property, without looking at its prototypes. This is often a more +useful piece of information than what the `in` operator gives us. + +{{index [prototype, pollution], "for/in loop"}} + +When you are worried that +someone (some other code you loaded into your program) might have +messed with the base object prototype, I recommend you write your +`for`/`in` loops like this: + +``` +for (var name in map) { + if (map.hasOwnProperty(name)) { + // ... this is an own property + } +} +``` + +## Prototype-less objects + +{{index map, [object, "as map"], "hasOwnProperty method"}} + +But the +rabbit hole doesn't end there. What if someone registered the name +`hasOwnProperty` in our `map` object and set it to the value 42? Now +the call to `map.hasOwnProperty` will try to call the local property, +which holds a number, not a function. + +{{index "Object.create function", [prototype, avoidance]}} + +In such a case, +prototypes just get in the way, and we would actually prefer to have +objects without prototypes. We saw the `Object.create` function, which +allows us to create an object with a specific prototype. You are +allowed to pass `null` as the prototype to create a fresh object with +no prototype. For objects like `map`, where the properties could be +anything, this is exactly what we want. + +``` +var map = Object.create(null); +map["pizza"] = 0.069; +console.log("toString" in map); +// → false +console.log("pizza" in map); +// → true +``` + +{{index "in operator", "for/in loop", "Object prototype"}} + +Much +better! We no longer need the `hasOwnProperty` kludge because all the +properties the object has are its own properties. Now we can safely +use `for`/`in` loops, no matter what people have been doing to +`Object.prototype`. + +## Polymorphism + +{{index "toString method", "String function", polymorphism, overriding}} + +When you call the +`String` function, which converts a value to a string, on an object, +it will call the `toString` method on that object to try to create a +meaningful string to return. I mentioned that some of the standard +prototypes define their own version of `toString` so they can +create a string that contains more useful information than +`"[object Object]"`. + +{{index "object-oriented programming"}} + +This is a simple instance of a +powerful idea. When a piece of code is written to work with objects +that have a certain ((interface))—in this case, a `toString` +method—any kind of object that happens to support this interface can +be plugged into the code, and it will just work. + +This technique is called _polymorphism_—though no actual +shape-shifting is involved. Polymorphic code can work with values of +different shapes, as long as they support the interface it expects. + +{{id tables}} +## Laying out a table + +{{index "MOUNTAINS data set", "table example"}} + +I am going to work through +a slightly more involved example in an attempt to give you a better +idea what ((polymorphism)), as well as ((object-oriented programming)) +in general, looks like. The project is this: we will write a program +that, given an array of arrays of ((table)) cells, builds up a string +that contains a nicely laid out table—meaning that the columns are +straight and the rows are aligned. Something like this: + +```text/plain +name height country +------------ ------ ------------- +Kilimanjaro 5895 Tanzania +Everest 8848 Nepal +Mount Fuji 3776 Japan +Mont Blanc 4808 Italy/France +Vaalserberg 323 Netherlands +Denali 6168 United States +Popocatepetl 5465 Mexico +``` + +The way our table-building system will work is that the builder +function will ask each cell how wide and high it wants to be and then +use this information to determine the width of the columns and the +height of the rows. The builder function will then ask the cells to +draw themselves at the correct size and assemble the results into a +single string. + +{{index "table example"}} + +{{id table_interface}} +The layout program will communicate with the cell +objects through a well-defined ((interface)). That way, the types of +cells that the program supports is not fixed in advance. We can add +new cell styles later—for example, underlined cells for table +headers—and if they support our interface, they will just work, +without requiring changes to the layout program. + +This is the interface: + +* `minHeight()` returns a number indicating the minimum height this + cell requires (in lines). + +* `minWidth()` returns a number indicating this cell's minimum width (in + characters). + +* `draw(width, height)` returns an array of length + `height`, which contains a series of strings that are each `width` characters wide. + This represents the content of the cell. + +{{index [function, "higher-order"]}} + +I'm going to make heavy use of higher-order +array methods in this example since it lends itself well to that +approach. + +{{index "rowHeights function", "colWidths function", maximum, "map method", "reduce method"}} + +The first part of the program computes +arrays of minimum column widths and row heights for a grid of cells. +The `rows` variable will hold an array of arrays, with each inner array +representing a row of cells. + +// include_code + +``` +function rowHeights(rows) { + return rows.map(function(row) { + return row.reduce(function(max, cell) { + return Math.max(max, cell.minHeight()); + }, 0); + }); +} + +function colWidths(rows) { + return rows[0].map(function(_, i) { + return rows.reduce(function(max, row) { + return Math.max(max, row[i].minWidth()); + }, 0); + }); +} +``` + +{{index "underscore character", "programming style"}} + +Using a variable name +starting with an underscore (_) or consisting entirely of a single +underscore is a way to indicate (to human readers) that this argument +is not going to be used. + +The `rowHeights` function shouldn't be too hard to follow. It uses +`reduce` to compute the maximum height of an array of cells and wraps +that in `map` in order to do it for all rows in the `rows` array. + +{{index "map method", "filter method", "forEach method", [array, indexing], "reduce method"}} + +Things are slightly +harder for the `colWidths` function because the outer array is an +array of rows, not of columns. I have failed to mention so far that +`map` (as well as `forEach`, `filter`, and similar array methods) +passes a second argument to the function it is given: the ((index)) of +the current element. By mapping over the elements of the first row and +only using the mapping function's second argument, `colWidths` builds +up an array with one element for every column index. The call to +`reduce` runs over the outer `rows` array for each index and picks +out the width of the widest cell at that index. + +{{index "table example", "drawTable function"}} + +Here's the code to draw a +table: + +// include_code + +``` +function drawTable(rows) { + var heights = rowHeights(rows); + var widths = colWidths(rows); + + function drawLine(blocks, lineNo) { + return blocks.map(function(block) { + return block[lineNo]; + }).join(" "); + } + + function drawRow(row, rowNum) { + var blocks = row.map(function(cell, colNum) { + return cell.draw(widths[colNum], heights[rowNum]); + }); + return blocks[0].map(function(_, lineNo) { + return drawLine(blocks, lineNo); + }).join("\n"); + } + + return rows.map(drawRow).join("\n"); +} +``` + +{{index "inner function", [nesting, "of functions"]}} + +The `drawTable` function +uses the internal helper function `drawRow` to draw all rows and then +joins them together with newline characters. + +{{index "table example"}} + +The `drawRow` function itself first converts the +cell objects in the row to _blocks_, which are arrays of strings +representing the content of the cells, split by line. A single cell +containing simply the number 3776 might be represented by a +single-element array like `["3776"]`, whereas an underlined cell might +take up two lines and be represented by the array `["name", "----"]`. + +{{index "map method", "join method"}} + +The blocks for a row, which all have +the same height, should appear next to each other in the final output. +The second call to `map` in `drawRow` builds up this output line by +line by mapping over the lines in the leftmost block and, for each of +those, collecting a line that spans the full width of the table. These +lines are then joined with newline characters to provide the whole row +as `drawRow`’s return value. + +The function `drawLine` extracts lines that should appear next +to each other from an array of blocks and joins them with a space +character to create a one-character gap between the table's columns. + +{{index "split method", [string, methods], "table example"}} + +{{id split}} +Now +let's write a constructor for cells that contain text, which +implements the ((interface)) for table cells. The constructor splits a +string into an array of lines using the string method `split`, which +cuts up a string at every occurrence of its argument and returns an +array of the pieces. The `minWidth` method finds the maximum line +width in this array. + +// include_code + +``` +function repeat(string, times) { + var result = ""; + for (var i = 0; i < times; i++) + result += string; + return result; +} + +function TextCell(text) { + this.text = text.split("\n"); +} +TextCell.prototype.minWidth = function() { + return this.text.reduce(function(width, line) { + return Math.max(width, line.length); + }, 0); +}; +TextCell.prototype.minHeight = function() { + return this.text.length; +}; +TextCell.prototype.draw = function(width, height) { + var result = []; + for (var i = 0; i < height; i++) { + var line = this.text[i] || ""; + result.push(line + repeat(" ", width - line.length)); + } + return result; +}; +``` + +{{index "TextCell type"}} + +The code uses a helper function called `repeat`, +which builds a string whose value is the `string` argument repeated +`times` number of times. The `draw` method uses it to add “padding” to +lines so that they all have the required length. + +Let's try everything we've written so far by building up a 5 × 5 +checkerboard. + +``` +var rows = []; +for (var i = 0; i < 5; i++) { + var row = []; + for (var j = 0; j < 5; j++) { + if ((j + i) % 2 == 0) + row.push(new TextCell("##")); + else + row.push(new TextCell(" ")); + } + rows.push(row); +} +console.log(drawTable(rows)); +// → ## ## ## +// ## ## +// ## ## ## +// ## ## +// ## ## ## +``` + +It works! But since all cells have the same size, the table-layout +code doesn't really do anything interesting. + +{{index "data set", "MOUNTAINS data set"}} + +{{id mountains}} +The source data for the table of +mountains that we are trying to build is available in the `MOUNTAINS` +variable in the ((sandbox)) and also +http://eloquentjavascript.net/code/mountains.js[downloadable] from the +website(!book (http://eloquentjavascript.net/code#6[_eloquentjavascript.net/code#6_])!). + +{{index "table example"}} + +We will want to highlight the top row, which +contains the column names, by underlining the cells with a series of +dash characters. No problem—we simply write a cell type that handles +underlining. + +// include_code + +``` +function UnderlinedCell(inner) { + this.inner = inner; +} +UnderlinedCell.prototype.minWidth = function() { + return this.inner.minWidth(); +}; +UnderlinedCell.prototype.minHeight = function() { + return this.inner.minHeight() + 1; +}; +UnderlinedCell.prototype.draw = function(width, height) { + return this.inner.draw(width, height - 1) + .concat([repeat("-", width)]); +}; +``` + +{{index "UnterlinedCell type"}} + +An underlined cell _contains_ another cell. +It reports its minimum size as being the same as that of its inner +cell (by calling through to that cell's `minWidth` and `minHeight` +methods) but adds one to the height to account for the space taken +up by the underline. + +{{index "concat method", concatenation}} + +Drawing such a cell is quite +simple—we take the content of the inner cell and concatenate a single +line full of dashes to it. + +{{index "dataTable function"}} + +Having an underlining mechanism, we can now +write a function that builds up a grid of cells from our data set. + +{{test wrap, trailing}} + +``` +function dataTable(data) { + var keys = Object.keys(data[0]); + var headers = keys.map(function(name) { + return new UnderlinedCell(new TextCell(name)); + }); + var body = data.map(function(row) { + return keys.map(function(name) { + return new TextCell(String(row[name])); + }); + }); + return [headers].concat(body); +} + +console.log(drawTable(dataTable(MOUNTAINS))); +// → name height country +// ------------ ------ ------------- +// Kilimanjaro 5895 Tanzania +// … etcetera +``` + +{{index "Object.keys function", property, "for/in loop"}} + +{{id keys}} +The standard +`Object.keys` function returns an array of property names in an +object. The top row of the table must contain underlined cells that +give the names of the columns. Below that, the values of all the +objects in the data set appear as normal cells—we extract them by +mapping over the `keys` array so that we are sure that the order of +the cells is the same in every row. + +{{index "right-aligning"}} + +The resulting table resembles the example shown +before, except that it does not right-align the numbers in the +`height` column. We will get to that in a moment. + +## Getters and setters + +{{index getter, setter, property}} + +When specifying an interface, it +is possible to include properties that are not methods. We could have +defined `minHeight` and `minWidth` to simply hold numbers. But that'd +have required us to compute them in the ((constructor)), which adds +code there that isn't strictly relevant to _constructing_ the object. +It would cause problems if, for example, the inner cell of an +underlined cell was changed, at which point the size of the underlined +cell should also change. + +{{index "programming style"}} + +This has led some people to adopt a principle +of never including nonmethod properties in interfaces. Rather than +directly access a simple value property, they'd use `getSomething` and +`setSomething` methods to read and write the property. This approach +has the downside that you will end up writing—and reading—a lot of +additional methods. + +Fortunately, JavaScript provides a technique that gets us the best of +both worlds. We can specify properties that, from the outside, look +like normal properties but secretly have ((method))s associated with +them. + +``` +var pile = { + elements: ["eggshell", "orange peel", "worm"], + get height() { + return this.elements.length; + }, + set height(value) { + console.log("Ignoring attempt to set height to", value); + } +}; + +console.log(pile.height); +// → 3 +pile.height = 100; +// → Ignoring attempt to set height to 100 +``` + +{{index "defineProperty function", "{} (object)", getter, setter}} + +In an object literal, the `get` or +`set` notation for properties allows you to specify a function to be +run when the property is read or written. You can also add such a +property to an existing object, for example a prototype, using the +`Object.defineProperty` function (which we previously used to create +nonenumerable properties). + +``` +Object.defineProperty(TextCell.prototype, "heightProp", { + get: function() { return this.text.length; } +}); + +var cell = new TextCell("no\nway"); +console.log(cell.heightProp); +// → 2 +cell.heightProp = 100; +console.log(cell.heightProp); +// → 2 +``` + +You can use a similar `set` property, in the object passed to +`defineProperty`, to specify a setter method. When a getter but no +setter is defined, writing to the property is simply ignored. + +## Inheritance + +{{index inheritance, "table example", alignment, "TextCell type"}} + +We are not quite done yet with our table layout exercise. It +helps readability to right-align columns of numbers. We should create +another cell type that is like `TextCell`, but rather than padding the +lines on the right side, it pads them on the left side so that they +align to the right. + +{{index "RTextCell type"}} + +We could simply write a whole new ((constructor)) +with all three methods in its prototype. But prototypes may themselves +have prototypes, and this allows us to do something clever. + +// include_code + +``` +function RTextCell(text) { + TextCell.call(this, text); +} +RTextCell.prototype = Object.create(TextCell.prototype); +RTextCell.prototype.draw = function(width, height) { + var result = []; + for (var i = 0; i < height; i++) { + var line = this.text[i] || ""; + result.push(repeat(" ", width - line.length) + line); + } + return result; +}; +``` + +{{index "shared property", overriding, interface}} + +We reuse the +constructor and the `minHeight` and `minWidth` methods from the +regular `TextCell`. An `RTextCell` is now basically equivalent to a +`TextCell`, except that its `draw` method contains a different +function. + +{{index "call method"}} + +This pattern is called _((inheritance))_. It allows +us to build slightly different data types from existing data types with +relatively little work. Typically, the new constructor will call the +old ((constructor)) (using the `call` method in order to be able to +give it the new object as its `this` value). Once this constructor has +been called, we can assume that all the fields that the old object +type is supposed to contain have been added. We arrange for the +constructor's ((prototype)) to derive from the old prototype so that +instances of this type will also have access to the properties in that +prototype. Finally, we can override some of these properties by adding +them to our new prototype. + +{{index "dataTable function"}} + +Now, if we slightly adjust the `dataTable` +function to use _RTextCell_s for cells whose value is a number, we +get the table we were aiming for. + +{{startCode "bottom_lines: 1"}} +{{includeCode "strip_log"}} + +``` +function dataTable(data) { + var keys = Object.keys(data[0]); + var headers = keys.map(function(name) { + return new UnderlinedCell(new TextCell(name)); + }); + var body = data.map(function(row) { + return keys.map(function(name) { + var value = row[name]; + // This was changed: + if (typeof value == "number") + return new RTextCell(String(value)); + else + return new TextCell(String(value)); + }); + }); + return [headers].concat(body); +} + +console.log(drawTable(dataTable(MOUNTAINS))); +// → … beautifully aligned table +``` + +{{index "object-oriented programming"}} + +Inheritance is a fundamental part of +the object-oriented tradition, alongside encapsulation and +polymorphism. But while the latter two are now generally regarded as +wonderful ideas, inheritance is somewhat controversial. + +{{index complexity}} + +The main reason for this is that it is often confused +with ((polymorphism)), sold as a more powerful tool than it really +is, and subsequently overused in all kinds of ugly ways. Whereas +((encapsulation)) and polymorphism can be used to _separate_ pieces of +code from each other, reducing the tangledness of the overall program, +((inheritance)) fundamentally ties types together, creating _more_ +tangle. + +{{index "code structure", "programming style"}} + +You can have +polymorphism without inheritance, as we saw. I am not going to tell +you to avoid inheritance entirely—I use it regularly in my own +programs. But you should see it as a slightly dodgy trick that can help you +define new types with little code, not as a grand principle of code +organization. A preferable way to extend types is through +((composition)), such as how `UnderlinedCell` builds on another cell +object by simply storing it in a property and forwarding method calls +to it in its own ((method))s. + +## The instanceof operator + +{{index type, "instanceof operator", constructor, object}} + +It is occasionally useful to know whether an object was derived +from a specific constructor. For this, JavaScript provides a binary +operator called `instanceof`. + +``` +console.log(new RTextCell("A") instanceof RTextCell); +// → true +console.log(new RTextCell("A") instanceof TextCell); +// → true +console.log(new TextCell("A") instanceof RTextCell); +// → false +console.log([1] instanceof Array); +// → true +``` + +{{index inheritance}} + +The operator will see through inherited types. +An `RTextCell` is an instance of `TextCell` because +`RTextCell.prototype` derives from `TextCell.prototype`. The operator +can be applied to standard constructors like `Array`. Almost every +object is an instance of `Object`. + +## Summary + +So objects are more complicated than I initially portrayed them. They +have prototypes, which are other objects, and will act as if they have +properties they don't have as long as the prototype has that property. +Simple objects have `Object.prototype` as their prototype. + +Constructors, which are functions whose names usually start with a +capital letter, can be used with the `new` operator to create new +objects. The new object's prototype will be the object found in the +`prototype` property of the constructor function. You can make good +use of this by putting the properties that all values of a given type +share into their prototype. The `instanceof` operator can, given an +object and a constructor, tell you whether that object is an instance +of that constructor. + +One useful thing to do with objects is to specify an interface for +them and tell everybody that they are supposed to talk to your +object only through that interface. The rest of the details that make up +your object are now _encapsulated_, hidden behind the interface. + +Once you are talking in terms of interfaces, who says that only one +kind of object may implement this interface? Having different objects +expose the same interface and then writing code that works on any +object with the interface is called _polymorphism_. It is very +useful. + +When implementing multiple types that differ in only some details, it +can be helpful to simply make the prototype of your new type derive +from the prototype of your old type and have your new constructor +call the old one. This gives you an object type similar to the +old type but for which you can add and override properties as you see +fit. + +## Exercises + +{{id exercise_vector}} +### A vector type + +{{index dimensions, "Vector type", coordinates, "vector (exercise)"}} + +Write a +((constructor)) `Vector` that represents a vector in two-dimensional +space. It takes `x` and `y` parameters (numbers), which it should save +to properties of the same name. + +{{index addition, subtraction}} + +Give the `Vector` prototype two +methods, `plus` and `minus`, that take another vector as a parameter +and return a new vector that has the sum or difference of the two +vectors’ (the one in `this` and the parameter) _x_ and _y_ values. + +Add a ((getter)) property `length` to the prototype that computes the +length of the vector—that is, the distance of the point (_x_, _y_) from +the origin (0, 0). + +{{if interactive + +{{test no}} + +``` +// Your code here. + +console.log(new Vector(1, 2).plus(new Vector(2, 3))); +// → Vector{x: 3, y: 5} +console.log(new Vector(1, 2).minus(new Vector(2, 3))); +// → Vector{x: -1, y: -1} +console.log(new Vector(3, 4).length); +// → 5 +``` +if}} + +{{hint + +{{index "vector (exercise)"}} + +Your solution can follow the pattern of the +`Rabbit` constructor from this chapter quite closely. + +{{index Pythagoras, "defineProperty function", "square root", "Math.sqrt function"}} + +Adding a getter property to the +constructor can be done with the `Object.defineProperty` function. To +compute the distance from (0, 0) to (x, y), you can use the +Pythagorean theorem, which says that the square of the distance we are +looking for is equal to the square of the x-coordinate plus the square +of the y-coordinate. Thus, (!html √(x^2^ + y^2^pass:[)]!)(!tex pass:[$\sqrt{x^2 + y^2}$]!) +is the number you want, and `Math.sqrt` is the way you compute a square +root in JavaScript. + +hint}} + +### Another cell + +{{index "StretchCell (exercise)", interface}} + +Implement a cell type named +`StretchCell(inner, width, height)` that conforms to the +[table cell interface](06_object.html#table_interface) described +earlier in the chapter. It should wrap another cell (like +`UnderlinedCell` does) and ensure that the resulting cell has at +least the given `width` and `height`, even if the inner cell would +naturally be smaller. + +{{if interactive + +{{test no}} + +``` +// Your code here. + +var sc = new StretchCell(new TextCell("abc"), 1, 2); +console.log(sc.minWidth()); +// → 3 +console.log(sc.minHeight()); +// → 2 +console.log(sc.draw(3, 2)); +// → ["abc", " "] +``` + +if}} + +{{hint + +{{index "StretchCell (exercise)"}} + +You'll have to store all three constructor +arguments in the instance object. The `minWidth` and `minHeight` +methods should call through to the corresponding methods in the +`inner` cell but ensure that no number less than the given size is +returned (possibly using `Math.max`). + +Don't forget to add a `draw` method that simply forwards the call to +the inner cell. + +hint}} + +### Sequence interface + +{{index "sequence (exercise)"}} + +Design an _((interface))_ that abstracts +((iteration)) over a ((collection)) of values. An object that provides +this interface represents a sequence, and the interface must somehow +make it possible for code that uses such an object to iterate over the +sequence, looking at the element values it is made up of and having +some way to find out when the end of the sequence is reached. + +When you have specified your interface, try to write a function +`logFive` that takes a sequence object and calls `console.log` on its +first five elements—or fewer, if the sequence has fewer than five +elements. + +Then implement an object type `ArraySeq` that wraps an array and +allows iteration over the array using the interface you designed. +Implement another object type `RangeSeq` that iterates over a range of +integers (taking `from` and `to` arguments to its constructor) +instead. + +{{if interactive + +{{test no}} + +``` +// Your code here. + +logFive(new ArraySeq([1, 2])); +// → 1 +// → 2 +logFive(new RangeSeq(100, 1000)); +// → 100 +// → 101 +// → 102 +// → 103 +// → 104 +``` + +if}} + +{{hint + +{{index "sequence (exercise)", collection}} + +One way to solve this is to +give the sequence objects _((state))_, meaning their properties are +changed in the process of using them. You could store a counter that +indicates how far the sequence object has advanced. + +Your ((interface)) will need to expose at least a way to get the next +element and to find out whether the iteration has reached the end of +the sequence yet. It is tempting to roll these into one method, +`next`, which returns `null` or `undefined` when the sequence is at +its end. But now you have a problem when a sequence actually contains +`null`. So a separate method (or getter property) to find out whether +the end has been reached is probably preferable. + +{{index mutation, "pure function", efficiency}} + +Another solution is +to avoid changing state in the object. You can expose a method for +getting the current element (without advancing any counter) and +another for getting a new sequence that represents the remaining +elements after the current one (or a special value if the end of the +sequence is reached). This is quite elegant—a sequence value will +“stay itself” even after it is used and can thus be shared with other +code without worrying about what might happen to it. It is, +unfortunately, also somewhat inefficient in a language like +JavaScript because it involves creating a lot of objects during +iteration. + +hint}} + diff --git a/07_elife.md b/07_elife.md new file mode 100644 index 000000000..a84504ef3 --- /dev/null +++ b/07_elife.md @@ -0,0 +1,1330 @@ +{{meta {chap_num: 7, prev_link: 06_object, next_link: 08_error, load_files: ["code/chapter/07_elife.js", "code/animateworld.js"], zip: html}}} + +# Project: Electronic Life + +{{quote {author: "Edsger Dijkstra", title: "The Threats to Computing Science", chapter: true} + +[...] the question of whether Machines Can Think [...] is about as +relevant as the question of whether Submarines Can Swim. + +quote}} + +{{index "artificial intelligence", "Dijkstra, Edsger", "project chapter", "reading code", "writing code"}} + +In “project” chapters, +I'll stop pummeling you with new theory for a brief moment and +instead work through a program with you. Theory is indispensable when +learning to program, but it should be accompanied by reading and +understanding nontrivial programs. + +{{index "artificial life", "electronic life", ecosystem}} + +Our +project in this chapter is to build a virtual ecosystem, a little +world populated with ((critter))s that move around and struggle for +survival. + +## Definition + +{{index dimensions, "electronic life"}} + +To make this +task manageable, we will radically simplify the concept of a +_((world))_. Namely, a world will be a two-dimensional ((grid)) where +each entity takes up one full square of the grid. On every _((turn))_, +the critters all get a chance to take some action. + +{{index discretization, simulation}} + +Thus, we chop both time and space +into units with a fixed size: squares for space and turns for time. Of +course, this is a somewhat crude and inaccurate ((approximation)). But +our simulation is intended to be amusing, not accurate, so we can +freely cut such corners. + +{{index array}} + +{{id plan}} +We can define a world with a _plan_, an array of +strings that lays out the world's grid using one character per square. + +// include_code + +``` +var plan = ["############################", + "# # # o ##", + "# #", + "# ##### #", + "## # # ## #", + "### ## # #", + "# ### # #", + "# #### #", + "# ## o #", + "# o # o ### #", + "# # #", + "############################"]; +``` + +The “#” characters in this plan represent ((wall))s and rocks, and the +“o” characters represent critters. The spaces, as you might have +guessed, are empty space. + +{{index object, "toString method", turn}} + +A plan array can be +used to create a ((world)) object. Such an object keeps track of the +size and content of the world. It has a `toString` method, which +converts the world back to a printable string (similar to the plan it +was based on) so that we can see what's going on inside. The world +object also has a `turn` method, which allows all the critters in it to +take one turn and updates the world to reflect their actions. + +{{id grid}} +## Representing space + +{{index [array, "as grid"], "Vector type", coordinates}} + +The ((grid)) +that models the world has a fixed width and height. Squares are +identified by their x- and y-coordinates. We use a simple type, +`Vector` (as seen in the exercises for the +[previous chapter](06_object.html#exercise_vector)), to represent +these coordinate pairs. + +// include_code + +``` +function Vector(x, y) { + this.x = x; + this.y = y; +} +Vector.prototype.plus = function(other) { + return new Vector(this.x + other.x, this.y + other.y); +}; +``` + +{{index object, encapsulation}} + +Next, we need an object type that +models the grid itself. A grid is part of a world, but we are making +it a separate object (which will be a property of a ((world)) object) +to keep the world object itself simple. The world should concern +itself with world-related things, and the grid should concern itself with grid-related things. + +{{index array, "data structure"}} + +To store a grid of values, we have +several options. We can use an array of row arrays and use two +property accesses to get to a specific square, like this: + +``` +var grid = [["top left", "top middle", "top right"], + ["bottom left", "bottom middle", "bottom right"]]; +console.log(grid[1][2]); +// → bottom right +``` + +{{index [array, indexing], coordinates, grid}} + +Or we can use a +single array, with size width × height, and decide that the element at +(_x_,_y_) is found at position _x_ + (_y_ × width) in the array. + +``` +var grid = ["top left", "top middle", "top right", + "bottom left", "bottom middle", "bottom right"]; +console.log(grid[2 + (1 * 3)]); +// → bottom right +``` + +{{index encapsulation, abstraction, "Array constructor", [array, creation], [array, "length of"]}} + +Since the actual access to this array will be wrapped in methods +on the grid object type, it doesn't matter to outside code which +approach we take. I chose the second representation because it makes +it much easier to create the array. When calling the `Array` +constructor with a single number as an argument, it creates a new empty +array of the given length. + +{{index "Grid type"}} + +This code defines the `Grid` object, with some basic +methods: + +// include_code + +``` +function Grid(width, height) { + this.space = new Array(width * height); + this.width = width; + this.height = height; +} +Grid.prototype.isInside = function(vector) { + return vector.x >= 0 && vector.x < this.width && + vector.y >= 0 && vector.y < this.height; +}; +Grid.prototype.get = function(vector) { + return this.space[vector.x + this.width * vector.y]; +}; +Grid.prototype.set = function(vector, value) { + this.space[vector.x + this.width * vector.y] = value; +}; +``` + +And here is a trivial test: + +``` +var grid = new Grid(5, 5); +console.log(grid.get(new Vector(1, 1))); +// → undefined +grid.set(new Vector(1, 1), "X"); +console.log(grid.get(new Vector(1, 1))); +// → X +``` + +## A critter's programming interface + +{{index record, "electronic life", interface}} + +Before we can +start on the `World` ((constructor)), we must get more specific about +the ((critter)) objects that will be living inside it. I mentioned +that the world will ask the critters what actions they want to take. +This works as follows: each critter object has an `act` ((method)) +that, when called, returns an _action_. An action is an object with a +`type` property, which names the type of action the critter wants to +take, for example `"move"`. The action may also contain extra +information, such as the direction the critter wants to move in. + +{{index "Vector type", "View type", "directions object", [object, "as map"]}} + +{{id directions}} +Critters are terribly myopic and can see only the +squares directly around them on the grid. But even this limited vision +can be useful when deciding which action to take. When the `act` +method is called, it is given a _view_ object that allows the critter +to inspect its surroundings. We name the eight surrounding squares by +their ((compass direction))s: `"n"` for north, `"ne"` for northeast, +and so on. Here's the object we will use to map from direction names +to coordinate offsets: + +// include_code + +``` +var directions = { + "n": new Vector( 0, -1), + "ne": new Vector( 1, -1), + "e": new Vector( 1, 0), + "se": new Vector( 1, 1), + "s": new Vector( 0, 1), + "sw": new Vector(-1, 1), + "w": new Vector(-1, 0), + "nw": new Vector(-1, -1) +}; +``` + +{{index "View type"}} + +The view object has a method `look`, which takes a +direction and returns a character, for example `"#"` when there is a +wall in that direction, or `" "` (space) when there is nothing there. +The object also provides the convenient methods `find` and `findAll`. +Both take a map character as an argument. The first returns a direction +in which the character can be found next to the critter or returns `null` if +no such direction exists. The second returns an array containing all +directions with that character. For example, a creature sitting left +(west) of a wall will get `["ne", "e", "se"]` when calling `findAll` +on its view object with the `"#"` character as argument. + +{{index bouncing, behavior, "BouncingCritter type"}} + +Here is a +simple, stupid critter that just follows its nose until it hits an +obstacle and then bounces off in a random open direction: + +// include_code + +``` +function randomElement(array) { + return array[Math.floor(Math.random() * array.length)]; +} + +var directionNames = "n ne e se s sw w nw".split(" "); + +function BouncingCritter() { + this.direction = randomElement(directionNames); +}; + +BouncingCritter.prototype.act = function(view) { + if (view.look(this.direction) != " ") + this.direction = view.find(" ") || "s"; + return {type: "move", direction: this.direction}; +}; +``` + +{{index "random number", "Math.random function", "randomElement function", [array, indexing]}} + +The `randomElement` helper +function simply picks a random element from an array, using +`Math.random` plus some arithmetic to get a random index. We'll use +this again later because randomness can be useful in ((simulation))s. + +{{index "Object.keys function"}} + +To pick a random direction, the +`BouncingCritter` constructor calls `randomElement` on an array of +direction names. We could also have used `Object.keys` to get this +array from the `directions` object we defined +[earlier](07_elife.html#directions), but that provides no +guarantees about the order in which the properties are listed. In most +situations, modern JavaScript engines will return properties in the +order they were defined, but they are not required to. + +{{index "|| operator", null}} + +The “_|| "s"_” in the `act` method is +there to prevent `this.direction` from getting the value `null` if the +critter is somehow trapped with no empty space around it (for example +when crowded into a corner by other critters). + +## The world object + +{{index "World type", "electronic life"}} + +Now we can start on the +`World` object type. The ((constructor)) takes a plan (the array of +strings representing the world's grid, described +[earlier](07_elife.html#grid)) and a _((legend))_ as arguments. A +legend is an object that tells us what each character in the map +means. It contains a constructor for every character—except for the +space character, which always refers to `null`, the value we'll use to +represent empty space. + +// include_code + +``` +function elementFromChar(legend, ch) { + if (ch == " ") + return null; + var element = new legend[ch](); + element.originChar = ch; + return element; +} + +function World(map, legend) { + var grid = new Grid(map[0].length, map.length); + this.grid = grid; + this.legend = legend; + + map.forEach(function(line, y) { + for (var x = 0; x < line.length; x++) + grid.set(new Vector(x, y), + elementFromChar(legend, line[x])); + }); +} +``` + +{{index "elementFromChar function", [object, "as map"]}} + +In `elementFromChar`, +first we create an instance of the right type by looking up the +character's constructor and applying `new` to it. Then we add an +`originChar` ((property)) to it to make it easy to find out what +character the element was originally created from. + +{{index "toString method", [nesting, "of loops"], "for loop", coordinates}} + +We need this `originChar` property when +implementing the world's `toString` method. This method builds up a +maplike string from the world's current state by performing a +two-dimensional loop over the squares on the grid. + +// include_code + +``` +function charFromElement(element) { + if (element == null) + return " "; + else + return element.originChar; +} + +World.prototype.toString = function() { + var output = ""; + for (var y = 0; y < this.grid.height; y++) { + for (var x = 0; x < this.grid.width; x++) { + var element = this.grid.get(new Vector(x, y)); + output += charFromElement(element); + } + output += "\n"; + } + return output; +}; +``` + +{{index "electronic life", constructor, "Wall type"}} + +A ((wall)) is +a simple object—it is used only for taking up space and has no +`act` method. + +// include_code + +``` +function Wall() {} +``` + +{{index "World type"}} + +When we try the `World` object by creating an +instance based on the plan from link:07_elife.html#plan[earlier in the +chapter] and then calling `toString` on it, we get a string very +similar to the plan we put in. + +{{includeCode "strip_log"}} +{{test trim}} + +``` +var world = new World(plan, {"#": Wall, + "o": BouncingCritter}); +console.log(world.toString()); +// → ############################ +// # # # o ## +// # # +// # ##### # +// ## # # ## # +// ### ## # # +// # ### # # +// # #### # +// # ## o # +// # o # o ### # +// # # # +// ############################ +``` + +## this and its scope + +{{index "forEach method", [function, scope], this, scope, "self variable", "global object"}} + +The `World` ((constructor)) contains a +call to `forEach`. One interesting thing to note is that inside the +function passed to `forEach`, we are no longer directly in the +function scope of the constructor. Each function call gets its own +`this` binding, so the `this` in the inner function does _not_ +refer to the newly constructed object that the outer `this` refers to. +In fact, when a function isn't called as a method, `this` will refer +to the global object. + +This means that we can't write `this.grid` to access the grid from +inside the ((loop)). Instead, the outer function creates a normal +local variable, `grid`, through which the inner function gets access +to the grid. + +{{index future, "ECMAScript 6", "arrow function", "self variable"}} + +This is a bit of a design blunder in JavaScript. +Fortunately, the next version of the language provides a solution for +this problem. Meanwhile, there are workarounds. A common pattern is to +say `var self = this` and from then on refer to `self`, which is a +normal variable and thus visible to inner functions. + +{{index "bind method", this}} + +Another solution is to use the `bind` +method, which allows us to provide an explicit `this` object to bind +to. + +``` +var test = { + prop: 10, + addPropTo: function(array) { + return array.map(function(elt) { + return this.prop + elt; + }.bind(this)); + } +}; +console.log(test.addPropTo([5])); +// → [15] +``` + +{{index "map method"}} + +The function passed to `map` is the result of the +`bind` call and thus has its `this` bound to the first argument given +to _bind_—the outer function's `this` value (which holds the `test` +object). + +{{index "context parameter", [function, "higher-order"]}} + +Most ((standard)) +higher-order methods on arrays, such as `forEach` and `map`, take an +optional second argument that can also be used to provide a `this` for +the calls to the iteration function. So you could express the previous example +in a slightly simpler way. + +``` +var test = { + prop: 10, + addPropTo: function(array) { + return array.map(function(elt) { + return this.prop + elt; + }, this); // ← no bind + } +}; +console.log(test.addPropTo([5])); +// → [15] +``` + +This works only for higher-order functions that +support such a _context_ parameter. When they don't, you'll need to +use one of the other approaches. + +{{index "context parameter", [function, "higher-order"], "call method"}} + +In +our own higher-order functions, we can support such a context +parameter by using the `call` method to call the function given as an +argument. For example, here is a `forEach` method for our `Grid` type, +which calls a given function for each element in the grid that isn't +null or undefined: + +// include_code + +``` +Grid.prototype.forEach = function(f, context) { + for (var y = 0; y < this.height; y++) { + for (var x = 0; x < this.width; x++) { + var value = this.space[x + y * this.width]; + if (value != null) + f.call(context, value, new Vector(x, y)); + } + } +}; +``` + +## Animating life + +{{index simulation, "electronic life", "World type"}} + +The next +step is to write a `turn` method for the world object that gives the +((critter))s a chance to act. It will go over the grid using the +`forEach` method we just defined, looking for objects with an `act` +method. When it finds one, `turn` calls that method to get an action +object and carries out the action when it is valid. For now, only +`"move"` actions are understood. + +{{index grid}} + +There is one potential problem with this approach. Can you +spot it? If we let critters move as we come across them, they may move +to a square that we haven't looked at yet, and we'll allow them to +move _again_ when we reach that square. Thus, we have to keep an array +of critters that have already had their turn and ignore them when we +see them again. + +// include_code + +``` +World.prototype.turn = function() { + var acted = []; + this.grid.forEach(function(critter, vector) { + if (critter.act && acted.indexOf(critter) == -1) { + acted.push(critter); + this.letAct(critter, vector); + } + }, this); +}; +``` + +{{index this}} + +We use the second parameter to the grid's `forEach` method +to be able to access the correct `this` inside the inner function. +The `letAct` method contains the actual logic that allows the critters +to move. + +// include_code + +{{id checkDestination}} +``` +World.prototype.letAct = function(critter, vector) { + var action = critter.act(new View(this, vector)); + if (action && action.type == "move") { + var dest = this.checkDestination(action, vector); + if (dest && this.grid.get(dest) == null) { + this.grid.set(vector, null); + this.grid.set(dest, critter); + } + } +}; + +World.prototype.checkDestination = function(action, vector) { + if (directions.hasOwnProperty(action.direction)) { + var dest = vector.plus(directions[action.direction]); + if (this.grid.isInside(dest)) + return dest; + } +}; +``` + +{{index "View type", "electronic life"}} + +First, we simply ask the +critter to act, passing it a view object that knows about the world +and the critter's current position in that world (we'll define `View` +in a [moment](07_elife.html#view)). The `act` method returns an +action of some kind. + +If the action's `type` is not `"move"`, it is ignored. If it _is_ +`"move"`, if it has a `direction` property that refers to a valid +direction, _and_ if the square in that direction is empty (null), we set +the square where the critter used to be to hold null and store the +critter in the destination square. + +{{index "error tolerance", "defensive programming", "sloppy programming", validation}} + +Note that `letAct` takes care to ignore +nonsense ((input))—it doesn't assume that the action's `direction` +property is valid or that the `type` property makes sense. This kind +of _defensive_ programming makes sense in some situations. The main +reason for doing it is to validate inputs coming from sources you +don't control (such as user or file input), but it can also be useful +to isolate subsystems from each other. In this case, the intention is +that the critters themselves can be programmed sloppily—they don't +have to verify if their intended actions make sense. They can just +request an action, and the world will figure out whether to allow it. + +{{index interface, "private property", "access control", [property, naming], "underscore character", "World type"}} + +These two methods are not part of the external interface of a +`World` object. They are an internal detail. Some languages provide +ways to explicitly declare certain methods and properties _private_ +and signal an error when you try to use them from outside the object. +JavaScript does not, so you will have to rely on some other form of +communication to describe what is part of an object's interface. +Sometimes it can help to use a naming scheme to distinguish between +external and internal properties, for example by prefixing all +internal ones with an underscore character (_). This will make +accidental uses of properties that are not part of an object's +interface easier to spot. + +{{index "View type"}} + +{{id view}} +The one missing part, the `View` type, looks like this: + +// include_code + +``` +function View(world, vector) { + this.world = world; + this.vector = vector; +} +View.prototype.look = function(dir) { + var target = this.vector.plus(directions[dir]); + if (this.world.grid.isInside(target)) + return charFromElement(this.world.grid.get(target)); + else + return "#"; +}; +View.prototype.findAll = function(ch) { + var found = []; + for (var dir in directions) + if (this.look(dir) == ch) + found.push(dir); + return found; +}; +View.prototype.find = function(ch) { + var found = this.findAll(ch); + if (found.length == 0) return null; + return randomElement(found); +}; +``` + +{{index "defensive programming"}} + +The `look` method figures out the +coordinates that we are trying to look at and, if they are inside the +((grid)), finds the character corresponding to the element that sits +there. For coordinates outside the grid, `look` simply pretends that +there is a wall there so that if you define a world that isn't walled +in, the critters still won't be tempted to try to walk off the edges. + +## It moves + +{{index "electronic life", simulation}} + +We instantiated a world +object earlier. Now that we've added all the necessary methods, it +should be possible to actually make the world move. + +``` +for (var i = 0; i < 5; i++) { + world.turn(); + console.log(world.toString()); +} +// → … five turns of moving critters +``` + +{{if book + +The first two maps that are displayed will look something like this +(depending on the random direction the critters picked): + +```null +############################ ############################ +# # # ## # # # ## +# o # # # +# ##### # # ##### o # +## # # ## # ## # # ## # +### ## # # ### ## # # +# ### # # # ### # # +# #### # # #### # +# ## # # ## # +# # o ### # #o # ### # +#o # o # # # o o # +############################ ############################ +``` + +{{index animation}} + +They move! To get a more interactive view of these +critters crawling around and bouncing off the walls, open this chapter +in the online version of the book at +http://eloquentjavascript.net[_eloquentjavascript.net_]. + +if}} + +{{if interactive + +Simply printing out many copies of the map is a rather unpleasant +way to observe a world, though. That's why the sandbox provides an +`animateWorld` function that will run a world as an onscreen +animation, moving three turns per second, until you hit the stop +button. + +{{test no}} + +``` +animateWorld(world); +// → … life! +``` + +The implementation of `animateWorld` will remain a mystery for now, +but after you've read the [later chapters](13_dom.html#dom) of this +book, which discuss JavaScript integration in web browsers, it won't +look so magical anymore. + +if}} + +## More life forms + +The dramatic highlight of our world, if you watch for a bit, is when +two critters bounce off each other. Can you think of another +interesting form of ((behavior))? + +{{index "wall following"}} + +The one I came up with is a ((critter)) that moves +along walls. Conceptually, the critter keeps its left hand (paw, +tentacle, whatever) to the wall and follows along. This turns out to +be not entirely trivial to implement. + +{{index "WallFollower type", "directions object"}} + +We need to be +able to “compute” with ((compass direction))s. Since directions are +modeled by a set of strings, we need to define our own operation +(`dirPlus`) to calculate relative directions. So `dirPlus("n", 1)` +means one 45-degree turn clockwise from north, giving `"ne"`. +Similarly, `dirPlus("s", -2)` means 90 degrees counterclockwise from +south, which is east. + +// include_code + +``` +function dirPlus(dir, n) { + var index = directionNames.indexOf(dir); + return directionNames[(index + n + 8) % 8]; +} + +function WallFollower() { + this.dir = "s"; +} + +WallFollower.prototype.act = function(view) { + var start = this.dir; + if (view.look(dirPlus(this.dir, -3)) != " ") + start = this.dir = dirPlus(this.dir, -2); + while (view.look(this.dir) != " ") { + this.dir = dirPlus(this.dir, 1); + if (this.dir == start) break; + } + return {type: "move", direction: this.dir}; +}; +``` + +{{index "artificial intelligence", pathfinding, "View type"}} + +The `act` +method only has to “scan” the critter's surroundings, starting from +its left side and going clockwise until it finds an empty square. +It then moves in the direction of that empty square. + +What complicates things is that a critter may end up in the middle of +empty space, either as its start position or as a result of walking +around another critter. If we apply the approach I just described in +empty space, the poor critter will just keep on turning left at every +step, running in circles. + +So there is an extra check (the `if` statement) to start scanning to +the left only if it looks like the critter has just passed some kind +of ((obstacle))—that is, if the space behind and to the left of the +critter is not empty. Otherwise, the critter starts scanning directly +ahead, so that it'll walk straight when in empty space. + +{{index "infinite loop"}} + +And finally, there's a test comparing `this.dir` to +`start` after every pass through the loop to make sure that the loop +won't run forever when the critter is walled in or crowded in by other +critters and can't find an empty square. + +{{if interactive + +This small world demonstrates the wall-following creatures: + +{{test no}} + +``` +animateWorld(new World( + ["############", + "# # #", + "# ~ ~ #", + "# ## #", + "# ## o####", + "# #", + "############"], + {"#": Wall, + "~": WallFollower, + "o": BouncingCritter} +)); +``` + +if}} + +## A more lifelike simulation + +{{index simulation, "electronic life"}} + +To make life in our world +more interesting, we will add the concepts of ((food)) and +((reproduction)). Each living thing in the world gets a new property, +`energy`, which is reduced by performing actions and increased by +eating things. When the critter has enough ((energy)), it can +reproduce, generating a new critter of the same kind. To keep things +simple, the critters in our world reproduce asexually, all by +themselves. + +{{index energy, entropy}} + +If critters only move around and eat one +another, the world will soon succumb to the law of increasing entropy, +run out of energy, and become a lifeless wasteland. To prevent this +from happening (too quickly, at least), we add ((plant))s to the +world. Plants do not move. They just use ((photosynthesis)) to grow +(that is, increase their energy) and reproduce. + +{{index "World type"}} + +To make this work, we'll need a world with a different +`letAct` method. We could just replace the method of the `World` +prototype, but I've become very attached to our simulation with the +wall-following critters and would hate to break that old world. + +{{index "actionTypes object", "LifeLikeWorld type"}} + +One solution is to use +((inheritance)). We create a new ((constructor)), `LifelikeWorld`, +whose prototype is based on the `World` prototype but which overrides +the `letAct` method. The new `letAct` method delegates the work of +actually performing an action to various functions stored in the +`actionTypes` object. + +// include_code + +``` +function LifelikeWorld(map, legend) { + World.call(this, map, legend); +} +LifelikeWorld.prototype = Object.create(World.prototype); + +var actionTypes = Object.create(null); + +LifelikeWorld.prototype.letAct = function(critter, vector) { + var action = critter.act(new View(this, vector)); + var handled = action && + action.type in actionTypes && + actionTypes[action.type].call(this, critter, + vector, action); + if (!handled) { + critter.energy -= 0.2; + if (critter.energy <= 0) + this.grid.set(vector, null); + } +}; +``` + +{{index "electronic life", [function, "as value"], "call method", this}} + +The new `letAct` method first checks whether an +action was returned at all, then whether a handler function for this +type of action exists, and finally whether that handler returned +true, indicating that it successfully handled the action. Note the use +of `call` to give the handler access to the world, through its `this` +binding. + +If the action didn't work for whatever reason, the default action is +for the creature to simply wait. It loses one-fifth point of ((energy)), +and if its energy level drops to zero or below, the creature dies and +is removed from the grid. + +## Action handlers + +{{index photosynthesis}} + +The simplest action a creature can perform is +`"grow"`, used by ((plant))s. When an action object like `{type: +"grow"}` is returned, the following handler method will be called: + +// include_code + +``` +actionTypes.grow = function(critter) { + critter.energy += 0.5; + return true; +}; +``` + +Growing always succeeds and adds half a point to the plant's +((energy)) level. + +Moving is more involved. + +// include_code + +``` +actionTypes.move = function(critter, vector, action) { + var dest = this.checkDestination(action, vector); + if (dest == null || + critter.energy <= 1 || + this.grid.get(dest) != null) + return false; + critter.energy -= 1; + this.grid.set(vector, null); + this.grid.set(dest, critter); + return true; +}; +``` + +{{index validation}} + +This action first checks, using the `checkDestination` +method defined [earlier](07_elife.html#checkDestination), whether +the action provides a valid destination. If not, or if the +destination isn't empty, or if the critter lacks the required +((energy)), `move` returns false to indicate no action was taken. +Otherwise, it moves the critter and subtracts the energy cost. + +{{index food}} + +In addition to moving, critters can eat. + +// include_code + +``` +actionTypes.eat = function(critter, vector, action) { + var dest = this.checkDestination(action, vector); + var atDest = dest != null && this.grid.get(dest); + if (!atDest || atDest.energy == null) + return false; + critter.energy += atDest.energy; + this.grid.set(dest, null); + return true; +}; +``` + +{{index validation}} + +Eating another ((critter)) also involves providing a +valid destination square. This time, the destination must not be +empty and must contain something with ((energy)), like a critter (but +not a wall—walls are not edible). If so, the energy from the eaten is +transferred to the eater, and the victim is removed from the grid. + +{{index reproduction}} + +And finally, we allow our critters to reproduce. + +// include_code + +``` +actionTypes.reproduce = function(critter, vector, action) { + var baby = elementFromChar(this.legend, + critter.originChar); + var dest = this.checkDestination(action, vector); + if (dest == null || + critter.energy <= 2 * baby.energy || + this.grid.get(dest) != null) + return false; + critter.energy -= 2 * baby.energy; + this.grid.set(dest, baby); + return true; +}; +``` + +{{index "electronic life"}} + +Reproducing costs twice the ((energy)) +level of the newborn critter. So we first create a (hypothetical) baby +using `elementFromChar` on the critter's own origin character. Once we +have a baby, we can find its energy level and test whether the parent +has enough energy to successfully bring it into the world. We also +require a valid (and empty) destination. + +{{index reproduction}} + +If everything is okay, the baby is put onto the grid +(it is now no longer hypothetical), and the energy is spent. + +## Populating the new world + +{{index "Plant type", "electronic life"}} + +We now have a +((framework)) to simulate these more lifelike creatures. We could put +the critters from the old world into it, but they would just die +since they don't have an ((energy)) property. So let's make new ones. +First we'll write a ((plant)), which is a rather simple life-form. + +// include_code + +``` +function Plant() { + this.energy = 3 + Math.random() * 4; +} +Plant.prototype.act = function(view) { + if (this.energy > 15) { + var space = view.find(" "); + if (space) + return {type: "reproduce", direction: space}; + } + if (this.energy < 20) + return {type: "grow"}; +}; +``` + +{{index reproduction, photosynthesis, "random number", "Math.random function"}} + +Plants start with an energy level +between 3 and 7, randomized so that they don't all reproduce in the +same turn. When a plant reaches 15 energy points and there is empty +space nearby, it reproduces into that empty space. If a plant can't +reproduce, it simply grows until it reaches energy level 20. + +{{index critter, "PlantEater type", herbivore, "food chain"}} + +We +now define a plant eater. + +// include_code + +``` +function PlantEater() { + this.energy = 20; +} +PlantEater.prototype.act = function(view) { + var space = view.find(" "); + if (this.energy > 60 && space) + return {type: "reproduce", direction: space}; + var plant = view.find("*"); + if (plant) + return {type: "eat", direction: plant}; + if (space) + return {type: "move", direction: space}; +}; +``` + +We'll use the `*` character for ((plant))s, so that's what this +creature will look for when it searches for ((food)). + +## Bringing it to life + +{{index "electronic life"}} + +And that gives us enough elements to try +our new world. Imagine the following map as a grassy valley with a herd of +((herbivore))s in it, some boulders, and lush ((plant)) life +everywhere. + +// include_code + +``` +var valley = new LifelikeWorld( + ["############################", + "##### ######", + "## *** **##", + "# *##** ** O *##", + "# *** O ##** *#", + "# O ##*** #", + "# ##** #", + "# O #* #", + "#* #** O #", + "#*** ##** O **#", + "##**** ###*** *###", + "############################"], + {"#": Wall, + "O": PlantEater, + "*": Plant} +); +``` + +{{index animation, simulation}} + +Let's see what happens if we run this. +(!book These snapshots illustrate a typical run of this world.!) + +{{if interactive + +{{startCode}} +{{test no}} + +``` +animateWorld(valley); +``` + +if}} + +{{if book + +```null +############################ ############################ +##### ###### ##### ** ###### +## *** O *## ## ** * O ## +# *##* ** *## # **## ## +# ** ##* *# # ** O ##O # +# ##* # # *O * * ## # +# ## O # # *** ## O # +# #* O # #** #*** # +#* #** O # #** O #**** # +#* O O ##* **# #*** ##*** O # +##* ###* ### ##** ###** O ### +############################ ############################ + +############################ ############################ +#####O O ###### ##### O ###### +## ## ## ## +# ##O ## # ## O ## +# O O *## # # ## # +# O O O **## O # # ## # +# **## O # # O ## * # +# # *** * # # # O # +# # O***** O # # O # O # +# ##****** # # ## O O # +## ###****** ### ## ### O ### +############################ ############################ + +############################ ############################ +##### ###### ##### ###### +## ## ## ** * ## +# ## ## # ## ***** ## +# ## # # ##**** # +# ##* * # # ##***** # +# O ## * # # ##****** # +# # # # # ** ** # +# # # # # # +# ## # # ## # +## ### ### ## ### ### +############################ ############################ +``` + +if}} + +{{index stability, reproduction, extinction, starvation}} + +Most +of the time, the plants multiply and expand quite quickly, but then +the abundance of ((food)) causes a population explosion of the +((herbivore))s, who proceed to wipe out all or nearly all of the +((plant))s, resulting in a mass starvation of the critters. Sometimes, +the ((ecosystem)) recovers and another cycle starts. At other times, +one of the species dies out completely. If it's the herbivores, the +whole space will fill with plants. If it's the plants, the remaining +critters starve, and the valley becomes a desolate wasteland. Ah, the +cruelty of nature. + +## Exercises + +### Artificial stupidity + +{{index "artificial stupidity (exercise)", "artificial intelligence", extinction}} + +Having the inhabitants of our world go +extinct after a few minutes is kind of depressing. To deal with this, +we could try to create a smarter plant eater. + +{{index pathfinding, reproduction, food}} + +There are several obvious +problems with our herbivores. First, they are terribly greedy, +stuffing themselves with every plant they see until they have wiped +out the local plant life. Second, their randomized movement (recall +that the `view.find` method returns a random direction when multiple +directions match) causes them to stumble around ineffectively and +starve if there don't happen to be any plants nearby. And finally, +they breed very fast, which makes the cycles between abundance and +famine quite intense. + +Write a new critter type that tries to address one or more of these +points and substitute it for the old `PlantEater` type in the valley +world. See how it fares. Tweak it some more if necessary. + +{{if interactive + +{{test no}} + +``` +// Your code here +function SmartPlantEater() {} + +animateWorld(new LifelikeWorld( + ["############################", + "##### ######", + "## *** **##", + "# *##** ** O *##", + "# *** O ##** *#", + "# O ##*** #", + "# ##** #", + "# O #* #", + "#* #** O #", + "#*** ##** O **#", + "##**** ###*** *###", + "############################"], + {"#": Wall, + "O": SmartPlantEater, + "*": Plant} +)); +``` + +if}} + +{{hint + +{{index "artificial stupidity (exercise)", "artificial intelligence", behavior, state}} + +The greediness problem can be +attacked in several ways. The critters could stop eating when they +reach a certain ((energy)) level. Or they could eat only every N turns (by +keeping a counter of the turns since their last meal in a property on +the creature object). Or, to make sure plants never go entirely +extinct, the animals could refuse to eat a ((plant)) unless they see +at least one other plant nearby (using the `findAll` method on the +view). A combination of these, or some entirely different strategy, +might also work. + +{{index pathfinding, "wall following"}} + +Making the critters move more +effectively could be done by stealing one of the movement strategies +from the critters in our old, energyless world. Both the bouncing +behavior and the wall-following behavior showed a much wider range of +movement than completely random staggering. + +{{index reproduction, stability}} + +Making creatures breed more slowly is +trivial. Just increase the minimum energy level at which they +reproduce. Of course, making the ecosystem more stable also makes it +more boring. If you have a handful of fat, immobile critters forever +munching on a sea of plants and never reproducing, that makes for a +very stable ecosystem. But no one wants to watch that. + +hint}} + +### Predators + +{{index "predators (exercise)", carnivore, "food chain"}} + +Any serious +((ecosystem)) has a food chain longer than a single link. Write +another ((critter)) that survives by eating the ((herbivore)) critter. +You'll notice that ((stability)) is even harder to achieve now that there +are cycles at multiple levels. Try to find a strategy to make the +ecosystem run smoothly for at least a little while. + +{{index "Tiger type"}} + +One thing that will help is to make the world bigger. +This way, local population booms or busts are less likely to wipe out +a species entirely, and there is space for the relatively large prey +population needed to sustain a small predator population. + +{{if interactive + +{{test no}} + +``` +// Your code here +function Tiger() {} + +animateWorld(new LifelikeWorld( + ["####################################################", + "# #### **** ###", + "# * @ ## ######## OO ##", + "# * ## O O **** *#", + "# ##* ########## *#", + "# ##*** * **** **#", + "#* ** # * *** ######### **#", + "#* ** # * # * **#", + "# ## # O # *** ######", + "#* @ # # * O # #", + "#* # ###### ** #", + "### **** *** ** #", + "# O @ O #", + "# * ## ## ## ## ### * #", + "# ** # * ##### O #", + "## ** O O # # *** *** ### ** #", + "### # ***** ****#", + "####################################################"], + {"#": Wall, + "@": Tiger, + "O": SmartPlantEater, // from previous exercise + "*": Plant} +)); +``` + +if}} + +{{hint + +{{index "predators (exercise)", reproduction, starvation}} + +Many of +the same tricks that worked for the previous exercise also apply here. +Making the predators big (lots of energy) and having them reproduce +slowly is recommended. That'll make them less vulnerable to periods of +starvation when the herbivores are scarce. + +Beyond staying alive, keeping its ((food)) stock alive is a +predator's main objective. Find some way to make predators hunt +more aggressively when there are a lot of ((herbivore))s and hunt more +slowly (or not at all) when prey is rare. Since plant eaters move +around, the simple trick of eating one only when others are nearby is +unlikely to work—that'll happen so rarely that your predator will +starve. But you could keep track of observations in previous turns, in +some ((data structure)) kept on the predator objects, and have it base +its ((behavior)) on what it has seen recently. + +hint}} + diff --git a/08_error.md b/08_error.md new file mode 100644 index 000000000..db2b7380a --- /dev/null +++ b/08_error.md @@ -0,0 +1,952 @@ +{{meta {chap_num: 8, prev_link: 07_elife, next_link: 09_regexp, load_files: ["code/chapter/08_error.js"]}}} + +# Bugs and Error Handling + +{{quote {author: "Brian Kernighan and P.J. Plauger", title: "The Elements of Programming Style", chapter: true} + +Debugging is +twice as hard as writing the code in the first place. Therefore, if +you write the code as cleverly as possible, you are, by definition, +not smart enough to debug it. + +quote}} + +{{if interactive + +{{quote {author: "Master Yuan-Ma", title: "The Book of Programming", chapter: true} + +Yuan-Ma had written a small program that used many global variables +and shoddy shortcuts. Reading it, a student asked, ‘You warned us +against these techniques, yet I find them in your program. How can +this be?’ The master said, ‘There is no need to fetch a water hose +when the house is not on fire.’ + +quote}} + +if}} + +{{index "Kernighan, Brian", "Plaugher, P.J.", debugging, "error handling"}} + +A program is crystallized thought. +Sometimes those thoughts are confused. Other times, mistakes are +introduced when converting thought into code. Either way, the result +is a flawed program. + +{{index input, output}} + +Flaws in a program are usually called ((bug))s. +Bugs can be programmer errors or problems in other systems that the +program interacts with. Some bugs are immediately apparent, while +others are subtle and might remain hidden in a system for years. + +Often, problems surface only when a program encounters a situation +that the programmer didn't originally consider. Sometimes such +situations are unavoidable. When the user is asked to input their age +and types _orange_, this puts our program in a difficult position. The +situation has to be anticipated and handled somehow. + +## Programmer mistakes + +{{index parsing, analysis}} + +When it comes to programmer mistakes, our +aim is simple. We want to find them and fix them. Such mistakes can +range from simple ((typo))s that cause the computer to complain as +soon as it lays eyes on our program to subtle mistakes in our +understanding of the way the program operates, causing incorrect +outcomes only in specific situations. Bugs of the latter type can +take weeks to diagnose. + +{{index "programming language", type, "static typing", "dynamic typing", "run-time error", error}} + +The degree to which languages +help you find such mistakes varies. Unsurprisingly, JavaScript is at +the “hardly helps at all” end of that scale. Some languages want to +know the types of all your variables and expressions before even +running a program and will tell you right away when a type is used in +an inconsistent way. JavaScript considers types only when actually +running the program, and even then, it allows you to do some clearly +nonsensical things without complaint, such as `x = true * "monkey"`. + +{{index syntax}} + +There are some things that JavaScript does complain about, +though. Writing a program that is not syntactically valid will +immediately trigger an error. Other things, such as calling something +that's not a function or looking up a ((property)) on an ((undefined)) +value, will cause an error to be reported when the program is running +and encounters the nonsensical action. + +{{index NaN, error}} + +But often, your nonsense computation will simply +produce a `NaN` (not a number) or undefined value. And the program +happily continues, convinced that it's doing something meaningful. The +mistake will manifest itself only later, after the bogus value has +traveled through several functions. It might not trigger an error at +all but silently cause the program's output to be wrong. Finding the +source of such problems can be difficult. + +{{index debugging}} + +The process of finding mistakes—bugs—in programs is +called _debugging_. + +## Strict mode + +{{index "strict mode", syntax, function}} + +{{indexsee "use strict", "strict mode"}} + +JavaScript can be made a +_little_ more strict by enabling _strict mode_. This is done by +putting the string `"use strict"` at the top of a file or a function +body. Here's an example: + +{{test error "ReferenceError: counter is not defined"}} + +``` +function canYouSpotTheProblem() { + "use strict"; + for (counter = 0; counter < 10; counter++) + console.log("Happy happy"); +} + +canYouSpotTheProblem(); +// → ReferenceError: counter is not defined +``` + +{{index "var keyword", [variable, global]}} + +Normally, when you forget to put +`var` in front of your variable, as with `counter` in the example, +JavaScript quietly creates a global variable and uses that. In strict +mode, however, an ((error)) is reported instead. This is very helpful. +It should be noted, though, that this doesn't work when the variable +in question already exists as a global variable, but only when +assigning to it would have created it. + +{{index this, "global object", undefined, "strict mode"}} + +Another +change in strict mode is that the `this` binding holds the value +`undefined` in functions that are not called as ((method))s. When +making such a call outside of strict mode, `this` refers to the global +scope object. So if you accidentally call a method or constructor +incorrectly in strict mode, JavaScript will produce an error as soon +as it tries to read something from `this`, rather than happily working +with the global object, creating and reading global variables. + +For example, consider the following code, which calls a +((constructor)) without the `new` keyword so that its `this` will +_not_ refer to a newly constructed object: + +``` +function Person(name) { this.name = name; } +var ferdinand = Person("Ferdinand"); // oops +console.log(name); +// → Ferdinand +``` + +{{index error}} + +So the bogus call to `Person` succeeded but returned an +undefined value and created the global variable `name`. In strict +mode, the result is different. + +{{test error "TypeError: Cannot set property 'name' of undefined"}} + +``` +"use strict"; +function Person(name) { this.name = name; } +// Oops, forgot 'new' +var ferdinand = Person("Ferdinand"); +// → TypeError: Cannot set property 'name' of undefined +``` + +We are immediately told that something is wrong. This is helpful. + +{{index parameter, [variable, naming], "with statement"}} + +Strict mode +does a few more things. It disallows giving a function multiple +parameters with the same name and removes certain problematic +language features entirely (such as the `with` statement, which is so +misguided it is not further discussed in this book). + +{{index debugging}} + +In short, putting a `"use strict"` at the top of your +program rarely hurts and might help you spot a problem. + +## Testing + +{{index "test suite", "run-time error"}} + +If the language is not going to do +much to help us find mistakes, we'll have to find them the hard way: +by running the program and seeing whether it does the right thing. + +Doing this by hand, again and again, is a sure way to drive yourself +insane. Fortunately, it is often possible to write a second program +that automates testing your actual program. + +{{index "Vector type"}} + +As an example, we once again use the `Vector` type. + +// include_code + +``` +function Vector(x, y) { + this.x = x; + this.y = y; +} +Vector.prototype.plus = function(other) { + return new Vector(this.x + other.x, this.y + other.y); +}; +``` + +We will write a program to check that our implementation of `Vector` +works as intended. Then, every time we change the implementation, we +follow up by running the test program so that we can be reasonably +confident that we didn't break anything. When we add extra +functionality (for example, a new method) to the `Vector` type, we also +add tests for the new feature. + +``` +function testVector() { + var p1 = new Vector(10, 20); + var p2 = new Vector(-10, 5); + var p3 = p1.plus(p2); + + if (p1.x !== 10) return "fail: x property"; + if (p1.y !== 20) return "fail: y property"; + if (p2.x !== -10) return "fail: negative x property"; + if (p3.x !== 0) return "fail: x from plus"; + if (p3.y !== 25) return "fail: y from plus"; + return "everything ok"; +} +console.log(testVector()); +// → everything ok +``` + +{{index "test suite", "testing framework", "domain-specific language"}} + +Writing tests like this tends to produce rather repetitive, +awkward code. Fortunately, there exist pieces of software that help +you build and run collections of tests (_test suites_) by providing a +language (in the form of functions and methods) suited to expressing +tests and by outputting informative information when a test fails. These +are called _testing frameworks_. + +## Debugging + +{{index debugging}} + +Once you notice that there is something wrong with your +program because it misbehaves or produces errors, the next step is to +figure out _what_ the problem is. + +Sometimes it is obvious. The ((error)) message will point at a +specific line of your program, and if you look at the error +description and that line of code, you can often see the problem. + +{{index "run-time error"}} + +But not always. Sometimes the line that triggered +the problem is simply the first place where a bogus value produced +elsewhere gets used in an invalid way. And sometimes there is no error +message at all—just an invalid result. If you have been solving the +((exercises)) in the earlier chapters, you will probably have already +experienced such situations. + +{{index "decimal number", "binary number"}} + +The following example program +tries to convert a whole number to a string in any base (decimal, +binary, and so on) by repeatedly picking out the last ((digit)) and then +dividing the number to get rid of this digit. But the insane output +that it currently produces suggests that it has a ((bug)). + +``` +function numberToString(n, base) { + var result = "", sign = ""; + if (n < 0) { + sign = "-"; + n = -n; + } + do { + result = String(n % base) + result; + n /= base; + } while (n > 0); + return sign + result; +} +console.log(numberToString(13, 10)); +// → 1.5e-3231.3e-3221.3e-3211.3e-3201.3e-3191.3e-3181.3… +``` + +{{index analysis}} + +Even if you see the problem already, pretend for a +moment that you don't. We know that our program is malfunctioning, and +we want to find out why. + +{{index "trial and error"}} + +This is where you must resist the urge to start +making random changes to the code. Instead, _think_. Analyze what is +happening and come up with a ((theory)) of why it might be happening. +Then, make additional observations to test this theory—or, if you +don't yet have a theory, make additional observations that might help +you come up with one. + +{{index "console.log", output, debugging, logging}} + +Putting a few +strategic `console.log` calls into the program is a good way to get +additional information about what the program is doing. In this case, +we want `n` to take the values `13`, `1`, and then `0`. Let's write +out its value at the start of the loop. + +```null +13 +1.3 +0.13 +0.013 +… +1.5e-323 +``` + +{{index rounding}} + +_Right_. Dividing 13 by 10 does not produce a whole +number. Instead of `n /= base`, what we actually want is `n = +Math.floor(n / base)` so that the number is properly “shifted” to the +right. + +{{index "JavaScript console", breakpoint, "debugger statement"}} + +An +alternative to using `console.log` is to use the _debugger_ +capabilities of your browser. Modern browsers come with the ability to +set a _breakpoint_ on a specific line of your code. This will cause +the execution of the program to pause every time the line with the +breakpoint is reached and allow you to inspect the values of +variables at that point. I won't go into details here since debuggers +differ from browser to browser, but look in your browser's developer +tools and search the Web for more information. Another way to set a +breakpoint is to include a `debugger` statement (consisting of simply +that keyword) in your program. If the ((developer tools)) of your +browser are active, the program will pause whenever it reaches that +statement, and you will be able to inspect its state. + +## Error propagation + +{{index input, output, "run-time error", error, validation}} + +Not all problems can be prevented +by the programmer, unfortunately. If your program communicates with +the outside world in any way, there is a chance that the input it gets +will be invalid or that other systems that it tries to talk to are +broken or unreachable. + +{{index "error recovery"}} + +Simple programs, or programs that run only under +your supervision, can afford to just give up when such a problem +occurs. You'll look into the problem and try again. “Real” +applications, on the other hand, are expected to not simply crash. +Sometimes the right thing to do is take the bad input in stride and +continue running. In other cases, it is better to report to the user +what went wrong and then give up. But in either situation, the program +has to actively do something in response to the problem. + +{{index "promptInteger function", validation}} + +Say you have a function +`promptInteger` that asks the user for a whole number and returns it. +What should it return if the user inputs _orange_? + +{{index null, undefined, "return value", "special return value"}} + +One option is to make it return a special value. Common +choices for such values are `null` and `undefined`. + +{{test no}} + +``` +function promptNumber(question) { + var result = Number(prompt(question, "")); + if (isNaN(result)) return null; + else return result; +} + +console.log(promptNumber("How many trees do you see?")); +``` + +This is a sound strategy. Now any code that calls `promptNumber` must +check whether an actual number was read and, failing that, must +somehow recover—maybe by asking again or by filling in a default +value. Or it could again return a special value to _its_ caller to +indicate that it failed to do what it was asked. + +{{index "error handling"}} + +In many situations, mostly when ((error))s are +common and the caller should be explicitly taking them into account, +returning a special value is a perfectly fine way to indicate an +error. It does, however, have its downsides. First, what if the +function can already return every possible kind of value? For such a +function, it is hard to find a special value that can be distinguished +from a valid result. + +{{index "special return value", readability}} + +The second issue with +returning special values is that it can lead to some very cluttered +code. If a piece of code calls `promptNumber` 10 times, it has to +check 10 times whether `null` was returned. And if its response to +finding `null` is to simply return `null` itself, the caller will in +turn have to check for it, and so on. + +## Exceptions + +{{index "error handling"}} + +When a function cannot proceed normally, what we +would _like_ to do is just stop what we are doing and immediately jump +back to a place that knows how to handle the problem. This is what +_((exception handling))_ does. + +{{index "control flow", "raising (exception)", "throw keyword", "call stack"}} + +Exceptions are a mechanism that make it possible for code that +runs into a problem to _raise_ (or _throw_) an exception, which is +simply a value. Raising an exception somewhat resembles a +super-charged return from a function: it jumps out of not just the +current function but also out of its callers, all the way down to the +first call that started the current execution. This is called +_((unwinding the stack))_. You may remember the stack of function +calls that was mentioned in [Chapter 3](03_functions.html#stack). +An exception zooms down this stack, throwing away all the call +contexts it encounters. + +{{index "error handling", syntax, "catch keyword"}} + +If exceptions +always zoomed right down to the bottom of the stack, they would not be +of much use. They would just provide a novel way to blow up your +program. Their power lies in the fact that you can set “obstacles” +along the stack to _catch_ the exception as it is zooming down. Then +you can do something with it, after which the program continues +running at the point where the exception was caught. + +Here's an example: + +{{id look}} +``` +function promptDirection(question) { + var result = prompt(question, ""); + if (result.toLowerCase() == "left") return "L"; + if (result.toLowerCase() == "right") return "R"; + throw new Error("Invalid direction: " + result); +} + +function look() { + if (promptDirection("Which way?") == "L") + return "a house"; + else + return "two angry bears"; +} + +try { + console.log("You see", look()); +} catch (error) { + console.log("Something went wrong: " + error); +} +``` + +{{index "exception handling", block, "throw keyword", "try keyword", "catch keyword"}} + +The `throw` keyword is used to raise an +exception. Catching one is done by wrapping a piece of code in a `try` +block, followed by the keyword `catch`. When the code in the `try` +block causes an exception to be raised, the `catch` block is +evaluated. The variable name (in parentheses) after `catch` will be +bound to the exception value. After the `catch` block finishes—or if +the `try` block finishes without problems—control proceeds beneath the +entire `try/catch` statement. + +{{index debugging, "call stack", "Error type", "stack trace"}} + +In this case, we used the `Error` ((constructor)) to create +our exception value. This is a ((standard)) JavaScript constructor +that creates an object with a `message` property. In modern JavaScript +environments, instances of this constructor also gather information +about the call stack that existed when the exception was created, a +so-called _stack trace_. This information is stored in the `stack` +property and can be helpful when trying to debug a problem: it +tells us the precise function where the problem occurred and which +other functions led up to the call that failed. + +{{index "exception handling"}} + +Note that the function `look` completely +ignores the possibility that `promptDirection` might go wrong. This is +the big advantage of exceptions—error-handling code is necessary only +at the point where the error occurs and at the point where it is +handled. The functions in between can forget all about it. + +Well, almost... + +## Cleaning up after exceptions + +{{index "exception handling", "cleaning up", "withContext function", "dynamic scope"}} + +Consider the following situation: a +function, `withContext`, wants to make sure that, during its +execution, the top-level variable `context` holds a specific context +value. After it finishes, it restores this variable to its old value. + +// include_code + +``` +var context = null; + +function withContext(newContext, body) { + var oldContext = context; + context = newContext; + var result = body(); + context = oldContext; + return result; +} +``` + +What if `body` raises an exception? In that case, the call to +`withContext` will be thrown off the stack by the exception, and +`context` will never be set back to its old value. + +{{index block, "try keyword", "finally keyword"}} + +There is one more +feature that `try` statements have. They may be followed by a +`finally` block either instead of or in addition to a `catch` +block. A `finally` block means “No matter _what_ happens, run this +code after trying to run the code in the `try` block”. If a function +has to clean something up, the cleanup code should usually be put into +a `finally` block. + +// include_code + +``` +function withContext(newContext, body) { + var oldContext = context; + context = newContext; + try { + return body(); + } finally { + context = oldContext; + } +} +``` + +{{index "withContext function"}} + +Note that we no longer have to store the +result of `body` (which we want to return) in a variable. Even if we +return directly from the `try` block, the `finally` block will be run. +Now we can do this and be safe: + +{{test no}} + +``` +try { + withContext(5, function() { + if (context < 10) + throw new Error("Not enough context!"); + }); +} catch (e) { + console.log("Ignoring: " + e); +} +// → Ignoring: Error: Not enough context! + +console.log(context); +// → null +``` + +Even though the function called from `withContext` exploded, +`withContext` itself still properly cleaned up the `context` variable. + +## Selective catching + +{{index "uncaught exception", "exception handling", "JavaScript console", "developer tools", "call stack", error}} + +When an +exception makes it all the way to the bottom of the stack without +being caught, it gets handled by the environment. What this means +differs between environments. In browsers, a description of the error +typically gets written to the JavaScript console (reachable through +the browser's Tools or Developer menu). + +{{index crash, "error handling"}} + +For programmer mistakes or problems +that the program cannot possibly handle, just letting the error go +through is often okay. An unhandled exception is a reasonable way to +signal a broken program, and the JavaScript console will, on modern +browsers, provide you with some information about which function calls +were on the stack when the problem occurred. + +{{index "user interface"}} + +For problems that are _expected_ to happen during +routine use, crashing with an unhandled exception is not a very +friendly response. + +{{index syntax, [function, application], "exception handling", "Error type"}} + +Invalid uses of the language, such as referencing a nonexistent +((variable)), looking up a property on `null`, or calling something +that's not a function, will also result in exceptions being raised. +Such exceptions can be caught just like your own exceptions. + +{{index "catch keyword"}} + +When a `catch` body is entered, all we know is that +_something_ in our `try` body caused an exception. But we don't know +_what_, or _which_ exception it caused. + +{{index "exception handling"}} + +JavaScript (in a rather glaring omission) +doesn't provide direct support for selectively catching exceptions: +either you catch them all or you don't catch any. This makes it very +easy to _assume_ that the exception you get is the one you were +thinking about when you wrote the `catch` block. + +{{index "promptDirection function"}} + +But it might not be. Some other +((assumption)) might be violated, or you might have introduced a bug +somewhere that is causing an exception. Here is an example, which +_attempts_ to keep on calling `promptDirection` until it gets a valid +answer: + +{{test no}} + +``` +for (;;) { + try { + var dir = promtDirection("Where?"); // ← typo! + console.log("You chose ", dir); + break; + } catch (e) { + console.log("Not a valid direction. Try again."); + } +} +``` + +{{index "infinite loop", "for loop", "catch keyword", debugging}} + +The +`for (;;)` construct is a way to intentionally create a loop that +doesn't terminate on its own. We break out of the loop only when a +valid direction is given. _But_ we misspelled `promptDirection`, +which will result in an “undefined variable” error. Because the +`catch` block completely ignores its exception value (`e`), assuming +it knows what the problem is, it wrongly treats the variable error as +indicating bad input. Not only does this cause an infinite loop, but +it also “buries” the useful error message about the misspelled +variable. + +As a general rule, don't blanket-catch exceptions unless it is for the +purpose of “routing” them somewhere—for example, over the network to +tell another system that our program crashed. And even then, think +carefully about how you might be hiding information. + +{{index "exception handling"}} + +So we want to catch a _specific_ kind of +exception. We can do this by checking in the `catch` block whether the +exception we got is the one we are interested in and by rethrowing it +otherwise. But how do we recognize an exception? + +Of course, we could match its `message` property against the ((error)) +message we happen to expect. But that's a shaky way to write code—we'd +be using information that's intended for human consumption (the +message) to make a programmatic decision. As soon as someone changes +(or translates) the message, the code will stop working. + +{{index "Error type", "instanceof operator"}} + +Rather, let's define a new +type of error and use `instanceof` to identify it. + +// include_code + +``` +function InputError(message) { + this.message = message; + this.stack = (new Error()).stack; +} +InputError.prototype = Object.create(Error.prototype); +InputError.prototype.name = "InputError"; +``` + +{{index "throw keyword", inheritance}} + +The prototype is made to derive +from `Error.prototype` so that `instanceof Error` will also return +true for `InputError` objects. It's also given a `name` property +since the standard error types (`Error`, `SyntaxError`, +`ReferenceError`, and so on) also have such a property. + +{{index "call stack"}} + +The assignment to the `stack` property tries to give +this object a somewhat useful ((stack trace)), on platforms that +support it, by creating a regular error object and then using that +object's `stack` property as its own. + +{{index "promptDirection function"}} + +Now `promptDirection` can throw such an +error. + +// include_code + +``` +function promptDirection(question) { + var result = prompt(question, ""); + if (result.toLowerCase() == "left") return "L"; + if (result.toLowerCase() == "right") return "R"; + throw new InputError("Invalid direction: " + result); +} +``` + +{{index "exception handling"}} + +And the loop can catch it more carefully. + +{{test no}} + +``` +for (;;) { + try { + var dir = promptDirection("Where?"); + console.log("You chose ", dir); + break; + } catch (e) { + if (e instanceof InputError) + console.log("Not a valid direction. Try again."); + else + throw e; + } +} +``` + +{{index debugging}} + +This will catch only instances of `InputError` and let +unrelated exceptions through. If you reintroduce the typo, the +undefined variable error will be properly reported. + +## Assertions + +{{index "assert function", assertion, debugging}} + +_Assertions_ are a +tool to do basic sanity checking for programmer errors. Consider this +helper function, `assert`: + +``` +function AssertionFailed(message) { + this.message = message; +} +AssertionFailed.prototype = Object.create(Error.prototype); + +function assert(test, message) { + if (!test) + throw new AssertionFailed(message); +} + +function lastElement(array) { + assert(array.length > 0, "empty array in lastElement"); + return array[array.length - 1]; +} +``` + +{{index validation, "run-time error", crash, assumption, array}} + +This provides a +compact way to enforce expectations, helpfully blowing up the program +if the stated condition does not hold. For instance, the `lastElement` +function, which fetches the last element from an array, would return +`undefined` on empty arrays if the assertion was omitted. Fetching the +last element from an empty array does not make much sense, so it is +almost certainly a programmer error to do so. + +{{index assertion, debugging}} + +Assertions are a way to make sure +mistakes cause failures at the point of the mistake, rather than +silently producing nonsense values that may go on to cause trouble in +an unrelated part of the system. + +## Summary + +Mistakes and bad input are facts of life. Bugs in programs need to be +found and fixed. They can become easier to notice by having automated +test suites and adding assertions to your programs. + +Problems caused by factors outside the program's control should +usually be handled gracefully. Sometimes, when the problem can be +handled locally, special return values are a sane way to track them. +Otherwise, exceptions are preferable. + +Throwing an exception causes the call stack to be unwound until the +next enclosing `try/catch` block or until the bottom of the stack. +The exception value will be given to the `catch` block that catches +it, which should verify that it is actually the expected kind of +exception and then do something with it. To deal with the +unpredictable control flow caused by exceptions, `finally` blocks can +be used to ensure a piece of code is _always_ run when a block +finishes. + +## Exercises + +### Retry + +{{index "primitiveMultiply (exercise)", "exception handling", "throw keyword"}} + +Say you have a function `primitiveMultiply` that, in 50 percent of +cases, multiplies two numbers, and in the other 50 percent, raises an +exception of type `MultiplicatorUnitFailure`. Write a function that +wraps this clunky function and just keeps trying until a call +succeeds, after which it returns the result. + +{{index "catch keyword"}} + +Make sure you handle only the exceptions you +are trying to handle. + +{{if interactive + +{{test no}} + +``` +function MultiplicatorUnitFailure() {} + +function primitiveMultiply(a, b) { + if (Math.random() < 0.5) + return a * b; + else + throw new MultiplicatorUnitFailure(); +} + +function reliableMultiply(a, b) { + // Your code here. +} + +console.log(reliableMultiply(8, 8)); +// → 64 +``` +if}} + +{{hint + +{{index "primitiveMultiply (exercise)", "try keyword", "catch keyword", "throw keyword"}} + +The call to `primitiveMultiply` should +obviously happen in a `try` block. The corresponding `catch` block +should rethrow the exception when it is not an instance of +`MultiplicatorUnitFailure` and ensure the call is retried when it is. + +To do the retrying, you can either use a loop that breaks only when a +call succeeds—as in the [`look` example](08_error.html#look) +earlier in this chapter—or use ((recursion)) and hope you don't get a +string of failures so long that it overflows the stack (which is a +pretty safe bet). + +hint}} + +### The locked box + +{{index "locked box (exercise)"}} + +Consider the following (rather contrived) +object: + +// include_code + +``` +var box = { + locked: true, + unlock: function() { this.locked = false; }, + lock: function() { this.locked = true; }, + _content: [], + get content() { + if (this.locked) throw new Error("Locked!"); + return this._content; + } +}; +``` + +{{index "private property", "access control"}} + +It is a ((box)) with a +lock. Inside is an array, but you can get at it only when the box is +unlocked. Directly accessing the `_content` property is not allowed. + +{{index "finally keyword", "exception handling"}} + +Write a function called +`withBoxUnlocked` that takes a function value as argument, unlocks the +box, runs the function, and then ensures that the box is locked again +before returning, regardless of whether the argument function returned +normally or threw an exception. + +{{if interactive + +``` +function withBoxUnlocked(body) { + // Your code here. +} + +withBoxUnlocked(function() { + box.content.push("gold piece"); +}); + +try { + withBoxUnlocked(function() { + throw new Error("Pirates on the horizon! Abort!"); + }); +} catch (e) { + console.log("Error raised:", e); +} +console.log(box.locked); +// → true +``` + +For extra points, make sure that if you call `withBoxUnlocked` when +the box is already unlocked, the box stays unlocked. + +if}} + +{{hint + +{{index "locked box (exercise)", "finally keyword", "try keyword"}} + +This +exercise calls for a `finally` block, as you probably guessed. Your +function should first unlock the box and then call the argument function +from inside a `try` body. The `finally` block after it should lock the +box again. + +To make sure we don't lock the box when it wasn't already locked, +check its lock at the start of the function and unlock and lock +it only when it started out locked. + +hint}} + diff --git a/09_regexp.md b/09_regexp.md new file mode 100644 index 000000000..55e74fb40 --- /dev/null +++ b/09_regexp.md @@ -0,0 +1,1443 @@ +{{meta {chap_num: 9, prev_link: 08_error, next_link: 10_modules}}} + +# Regular Expressions + +{{quote {author: "Jamie Zawinski", chapter: true} + +Some people, when confronted with a +problem, think ‘I know, I'll use regular expressions.’ Now they have +two problems. + +quote}} + +{{if interactive + +{{quote {author: "Master Yuan-Ma", title: "The Book of Programming", chapter: true} + +Yuan-Ma said, ‘When you cut against the grain of the wood, much +strength is needed. When you program against the grain of a problem, +much code is needed.’ + +quote}} + +if}} + +{{index "Zawinski, Jamie", evolution, adoption, integration}} + +Programming +((tool))s and techniques survive and spread in a chaotic, evolutionary +way. It's not always the pretty or brilliant ones that win but rather +the ones that function well enough within the right niche—for example, +by being integrated with another successful piece of technology. + +{{index "domain-specific language"}} + +In this chapter, I will discuss one such +tool, _((regular expression))s_. Regular expressions are a way to +describe ((pattern))s in string data. They form a small, separate +language that is part of JavaScript and many other languages and +tools. + +{{index [interface, design]}} + +Regular expressions are both terribly awkward +and extremely useful. Their syntax is cryptic, and the programming +((interface)) JavaScript provides for them is clumsy. But they are a +powerful ((tool)) for inspecting and processing strings. Properly +understanding regular expressions will make you a more effective +programmer. + +## Creating a regular expression + +{{index ["regular expression", creation], "RegExp constructor", "literal expression", "slash character"}} + +A regular expression is a type of +object. It can either be constructed with the `RegExp` constructor or +written as a literal value by enclosing the pattern in forward slash +(`/`) characters. + +``` +var re1 = new RegExp("abc"); +var re2 = /abc/; +``` + +Both of these regular expression objects represent the same +((pattern)): an _a_ character followed by a _b_ followed by a _c_. + +{{index "backslash character", "RegExp constructor"}} + +When using the +`RegExp` constructor, the pattern is written as a normal string, so +the usual rules apply for backslashes. + +{{index ["regular expression", escaping], [escaping, "in regexps"], "slash character"}} + +The second notation, where the pattern appears between +slash characters, treats backslashes somewhat differently. First, +since a forward slash ends the pattern, we need to put a backslash +before any forward slash that we want to be _part_ of the pattern. In +addition, backslashes that aren't part of special character codes +(like `\n`) will be _preserved_, rather than ignored as they are in +strings, and change the meaning of the pattern. Some characters, such +as question marks and plus signs, have special meanings in regular +expressions and must be preceded by a backslash if they are meant to +represent the character itself. + +``` +var eighteenPlus = /eighteen\+/; +``` + +Knowing precisely what characters to backslash-escape when writing +regular expressions requires you to know every character with a +special meaning. For the time being, this may not be realistic, so +when in doubt, just put a backslash before any character that is not a +letter, number, or ((whitespace)). + +## Testing for matches + +{{index matching, "test method", ["regular expression", methods]}} + +Regular +expression objects have a number of methods. The simplest one is +`test`. If you pass it a string, it will return a ((Boolean)) telling +you whether the string contains a match of the pattern in the +expression. + +``` +console.log(/abc/.test("abcde")); +// → true +console.log(/abc/.test("abxde")); +// → false +``` + +{{index pattern}} + +A ((regular expression)) consisting of only nonspecial +characters simply represents that sequence of characters. If _abc_ +occurs anywhere in the string we are testing against (not just at the +start), `test` will return `true`. + +## Matching a set of characters + +{{index "regular expression", "indexOf method"}} + +Finding out whether a +string contains _abc_ could just as well be done with a call to +`indexOf`. Regular expressions allow us to go beyond that and express +more complicated ((pattern))s. + +Say we want to match any ((number)). In a regular expression, putting +a ((set)) of characters between square brackets makes that part of the +expression match any of the characters between the brackets. + +Both of the following expressions match all strings that contain a ((digit)): + +``` +console.log(/[0123456789]/.test("in 1992")); +// → true +console.log(/[0-9]/.test("in 1992")); +// → true +``` + +{{index "dash character"}} + +Within square brackets, a dash (`-`) between two +characters can be used to indicate a ((range)) of characters, where +the ordering is determined by the character's ((Unicode)) number. +Characters 0 to 9 sit right next to each other in this ordering +(codes 48 to 57), so `[0-9]` covers all of them and matches any +((digit)). + +{{index whitespace, "alphanumeric character", "period character"}} + +There are a number of common character groups that have +their own built-in shortcuts. Digits are one of them: `\d` means the +same thing as `[0-9]`. + +{{index "newline character"}} + +[cols="1,5"] +|==== +|`\d` |Any ((digit)) character +|`\w` |An alphanumeric character (“((word character))”) +|`\s` |Any ((whitespace)) character (space, tab, newline, and similar) +|`\D` |A character that is _not_ a digit +|`\W` |A nonalphanumeric character +|`\S` |A nonwhitespace character +|`.` |Any character except for newline +|==== + +So you could match a ((date)) and ((time)) format like 30-01-2003 +15:20 with the following expression: + +``` +var dateTime = /\d\d-\d\d-\d\d\d\d \d\d:\d\d/; +console.log(dateTime.test("30-01-2003 15:20")); +// → true +console.log(dateTime.test("30-jan-2003 15:20")); +// → false +``` + +{{index "backslash character"}} + +That looks completely awful, doesn't it? It has way too +many backslashes, producing background noise that makes it hard to +spot the actual ((pattern)) expressed. We'll see a slightly improved +version of this expression +[later](09_regexp.html#date_regexp_counted). + +{{index [escaping, "in regexps"], "regular expression", set}} + +These +backslash codes can also be used inside ((square brackets)). For +example, `[\d.]` means any digit or a period character. But note that +the period itself, when used between square brackets, loses its +special meaning. The same goes for other special characters, such as +`+`. + +{{index "square brackets", inversion, "caret character"}} + +To _invert_ a +set of characters—that is, to express that you want to match any +character _except_ the ones in the set—you can write a caret (`^`) +character after the opening bracket. + +``` +var notBinary = /[^01]/; +console.log(notBinary.test("1100100010100110")); +// → false +console.log(notBinary.test("1100100010200110")); +// → true +``` + +## Repeating parts of a pattern + +{{index ["regular expression", repetition]}} + +We now know how to match a single digit. What +if we want to match a whole number—a ((sequence)) of one or more +((digit))s? + +{{index "plus character", repetition, "+ operator"}} + +When you put a +plus sign (`+`) after something in a regular expression, it indicates +that the element may be repeated more than once. Thus, `/\d+/` matches +one or more digit characters. + +``` +console.log(/'\d+'/.test("'123'")); +// → true +console.log(/'\d+'/.test("''")); +// → false +console.log(/'\d*'/.test("'123'")); +// → true +console.log(/'\d*'/.test("''")); +// → true +``` + +{{index "pass:[*] operator", asterisk}} + +The star (`*`) has a similar +meaning but also allows the pattern to match zero times. Something +with a star after it never prevents a pattern from matching—it'll just +match zero instances if it can't find any suitable text to match. + +{{index "British English", "American English", "question mark"}} + +A +question mark makes a part of a pattern “((optional))”, meaning it may +occur zero or one time. In the following example, the _u_ character +is allowed to occur, but the pattern also matches when it is missing. + +``` +var neighbor = /neighbou?r/; +console.log(neighbor.test("neighbour")); +// → true +console.log(neighbor.test("neighbor")); +// → true +``` + +{{index repetition, "curly braces"}} + +To indicate that a pattern should +occur a precise number of times, use curly braces. Putting `{4}` after +an element, for example, requires it to occur exactly four times. It +is also possible to specify a ((range)) this way: `{2,4}` means the +element must occur at least twice and at most four times. + +{{id date_regexp_counted}} +Here is another version of the ((date)) and ((time)) pattern that +allows both single- and double-((digit)) days, months, and hours. It +is also slightly more readable. + +``` +var dateTime = /\d{1,2}-\d{1,2}-\d{4} \d{1,2}:\d{2}/; +console.log(dateTime.test("30-1-2003 8:45")); +// → true +``` + +You can also specify open-ended ((range))s when using ((curly braces)) +by omitting the number after the comma. So `{5,}` means five or more +times. + +## Grouping subexpressions + +{{index ["regular expression", grouping], grouping}} + +To use an operator like `*` or +`+` on more than one element at a time, you can use ((parentheses)). A +part of a regular expression that is enclosed in parentheses counts +as a single element as far as the operators following it are +concerned. + +``` +var cartoonCrying = /boo+(hoo+)+/i; +console.log(cartoonCrying.test("Boohoooohoohooo")); +// → true +``` + +{{index crying}} + +The first and second `+` characters apply only to the +second _o_ in _boo_ and _hoo_, respectively. The third `+` applies to +the whole group `(hoo+)`, matching one or more sequences like that. + +{{index "case sensitivity", capitalization, ["regular expression", flags]}} + +The `i` at the end of the expression in the +previous example makes this regular expression case insensitive, allowing it to +match the uppercase _B_ in the input string, even though the pattern +is itself all lowercase. + +## Matches and groups + +{{index ["regular expression", grouping], "exec method", array}} + +The `test` method +is the absolute simplest way to match a regular expression. It +tells you only whether it matched and nothing else. Regular expressions +also have an `exec` (execute) method that will return `null` if no +match was found and return an object with information about the match +otherwise. + +``` +var match = /\d+/.exec("one two 100"); +console.log(match); +// → ["100"] +console.log(match.index); +// → 8 +``` + +{{index "index property", [string, indexing]}} + +An object returned from +`exec` has an `index` property that tells us _where_ in the string the +successful match begins. Other than that, the object looks like (and +in fact is) an array of strings, whose first element is the string +that was matched—in the previous example, this is the sequence of +((digit))s that we were looking for. + +{{index [string, methods], "match method"}} + +String values have a `match` +method that behaves similarly. + +``` +console.log("one two 100".match(/\d+/)); +// → ["100"] +``` + +{{index grouping, "capture group", "exec method"}} + +When the regular +expression contains subexpressions grouped with parentheses, the text +that matched those groups will also show up in the array. The whole +match is always the first element. The next element is the part +matched by the first group (the one whose opening parenthesis comes +first in the expression), then the second group, and so on. + +``` +var quotedText = /'([^']*)'/; +console.log(quotedText.exec("she said 'hello'")); +// → ["'hello'", "hello"] +``` + +{{index "capture group"}} + +When a group does not end up being matched at all +(for example, when followed by a question mark), its position in the +output array will hold `undefined`. Similarly, when a group is matched +multiple times, only the last match ends up in the array. + +``` +console.log(/bad(ly)?/.exec("bad")); +// → ["bad", undefined] +console.log(/(\d)+/.exec("123")); +// → ["123", "3"] +``` + +{{index "exec method", ["regular expression", methods], extraction}} + +Groups can be useful for +extracting parts of a string. If we don't just want to verify whether +a string contains a ((date)) but also extract it and construct an +object that represents it, we can wrap parentheses around the digit +patterns and directly pick the date out of the result of `exec`. + +But first, a brief detour, in which we discuss the preferred way to +store date and ((time)) values in JavaScript. + +## The date type + +{{index constructor, "Date constructor"}} + +JavaScript has a standard +object type for representing ((date))s—or rather, points in ((time)). +It is called `Date`. If you simply create a date object using `new`, +you get the current date and time. + +{{test no}} + +``` +console.log(new Date()); +// → Wed Dec 04 2013 14:24:57 GMT+0100 (CET) +``` + +{{index "Date constructor"}} + +You can also create an object for a specific +time. + +``` +console.log(new Date(2009, 11, 9)); +// → Wed Dec 09 2009 00:00:00 GMT+0100 (CET) +console.log(new Date(2009, 11, 9, 12, 59, 59, 999)); +// → Wed Dec 09 2009 12:59:59 GMT+0100 (CET) +``` + +{{index "zero-based counting", [interface, design]}} + +JavaScript uses a +convention where month numbers start at zero (so December is 11), yet +day numbers start at one. This is confusing and silly. Be careful. + +The last four arguments (hours, minutes, seconds, and milliseconds) +are optional and taken to be zero when not given. + +{{index "getTime method"}} + +Timestamps are stored as the number of +milliseconds since the start of 1970, using negative numbers for +times before 1970 (following a convention set by “((Unix time))”, +which was invented around that time). The `getTime` method on a date object +returns this number. It is big, as you can imagine. + +``` +console.log(new Date(2013, 11, 19).getTime()); +// → 1387407600000 +console.log(new Date(1387407600000)); +// → Thu Dec 19 2013 00:00:00 GMT+0100 (CET) +``` + +{{index "Date.now function", "Date constructor"}} + +If you give the `Date` +constructor a single argument, that argument is treated as such +a millisecond count. You can get the current millisecond count by +creating a new `Date` object and calling `getTime` on it but also by +calling the `Date.now` function. + +{{index "getFullYear method", "getMonth method", "getDate method", "getHours method", "getMinutes method", "getSeconds method", "getYear method"}} + +Date objects provide methods like +`getFullYear`, `getMonth`, `getDate`, `getHours`, `getMinutes`, and +`getSeconds` to extract their components. There's also `getYear`, +which gives you a rather useless two-digit year value (such as `93` or +`14`). + +{{index "capture group"}} + +Putting ((parentheses)) around the parts of the +expression that we are interested in, we can now easily create a date +object from a string. + +``` +function findDate(string) { + var dateTime = /(\d{1,2})-(\d{1,2})-(\d{4})/; + var match = dateTime.exec(string); + return new Date(Number(match[3]), + Number(match[2]) - 1, + Number(match[1])); +} +console.log(findDate("30-1-2003")); +// → Thu Jan 30 2003 00:00:00 GMT+0100 (CET) +``` + +## Word and string boundaries + +{{index matching, ["regular expression", boundary]}} + +Unfortunately, +`findDate` will also happily extract the nonsensical date 00-1-3000 +from the string `"100-1-30000"`. A match may happen anywhere in the +string, so in this case, it'll just start at the second character and +end at the second-to-last character. + +{{index boundary, "caret character", "dollar sign"}} + +If we want to +enforce that the match must span the whole string, we can add the +markers `^` and `$`. The caret matches the start of the input string, +while the dollar sign matches the end. So, `/^\d+$/` matches a string +consisting entirely of one or more digits, `/^!/` matches any string +that starts with an exclamation mark, and `/x^/` does not match any +string (there cannot be an _x_ before the start of the string). + +{{index "word boundary", "word character"}} + +If, on the other hand, we just +want to make sure the date starts and ends on a word boundary, we can +use the marker `\b`. A word boundary can be the start or end of the +string or any point in the string that has a word character (as in +`\w`) on one side and a nonword character on the other. + +``` +console.log(/cat/.test("concatenate")); +// → true +console.log(/\bcat\b/.test("concatenate")); +// → false +``` + +{{index matching}} + +Note that a boundary marker doesn't represent an actual +character. It just enforces that the regular expression matches only +when a certain condition holds at the place where it appears in the +pattern. + +## Choice patterns + +{{index branching, ["regular expression", alternatives], "farm example"}} + +Say we want to know whether a piece of text contains not +only a number but a number followed by one of the words _pig_, _cow_, +or _chicken_, or any of their plural forms. + +We could write three regular expressions and test them in turn, but +there is a nicer way. The ((pipe character)) (`|`) denotes a +((choice)) between the pattern to its left and the pattern to its +right. So I can say this: + +``` +var animalCount = /\b\d+ (pig|cow|chicken)s?\b/; +console.log(animalCount.test("15 pigs")); +// → true +console.log(animalCount.test("15 pigchickens")); +// → false +``` + +{{index parentheses}} + +Parentheses can be used to limit the part of the +pattern that the pipe operator applies to, and you can put multiple +such operators next to each other to express a choice between more +than two patterns. + +## The mechanics of matching + +{{index ["regular expression", matching], [matching, algorithm]}} + +Regular +expressions can be thought of as ((flow diagram))s. This is the +diagram for the livestock expression in the previous example: + +{{figure {url: "img/re_pigchickens.svg", alt: "Visualization of /\\b\\d+ (pig|cow|chicken)s?\\b/"}}} + +{{index traversal}} + +Our expression matches a string if we can find a path +from the left side of the diagram to the right side. We keep +a current position in the string, and every time we move through a +box, we verify that the part of the string after our current position +matches that box. + +So if we try to match `"the 3 pigs"` with our regular expression, +our progress through the flow chart would look like this: + + - At position 4, there is a word ((boundary)), so we can move past + the first box. + + - Still at position 4, we find a digit, so we can also move past the + second box. + + - At position 5, one path loops back to before the second (digit) box, + while the other moves forward through the box that holds a single space + character. There is a space here, not a digit, so we must take the + second path. + + - We are now at position 6 (the start of “pigs”) and at the three-way + branch in the diagram. We don't see “cow” or “chicken” here, but we + do see “pig”, so we take that branch. + + - At position 9, after the three-way branch, one path skips + the _s_ box and goes straight to the final word boundary, while the other path + matches an _s_. There is an _s_ character here, not a word boundary, + so we go through the _s_ box. + + - We're at position 10 (the end of the string) and can match only a + word ((boundary)). The end of a string counts as a word boundary, + so we go through the last box and have successfully matched this + string. + +{{index ["regular expression", matching], [matching, algorithm], searching}} + +Conceptually, +a regular expression engine looks for a match in a string as follows: +it starts at the start of the string and tries a match there. In this +case, there _is_ a word boundary there, so it'd get past the first +box—but there is no digit, so it'd fail at the second box. Then it +moves on to the second character in the string and tries to begin a +new match there... and so on, until it finds a match or reaches the end +of the string and decides that there really is no match. + +{{id backtracking}} +## Backtracking + +{{index ["regular expression", backtracking], "binary number", "decimal number", "hexadecimal number", "flow diagram", [matching, algorithm], backtracking}} + +The regular +expression `/\b([01]+b|\d+|[\da-f]+h)\b/` matches either a binary +number followed by a _b_, a regular decimal number with no suffix +character, or a hexadecimal number (that is, base 16, with the letters +_a_ to _f_ standing for the digits 10 to 15) followed by an _h_. This +is the corresponding diagram: + +{{figure {url: "img/re_number.svg", alt: "Visualization of /\\b([01]+b|\\d+|[\\da-f]+h)\\b/"}}} + +{{index branching}} + +When matching this expression, it will often happen +that the top (binary) branch is entered even though the input does not +actually contain a binary number. When matching the string `"103"`, +for example, it becomes clear only at the 3 that we are in the wrong +branch. The string _does_ match the expression, just not the branch we +are currently in. + +{{index backtracking, searching}} + +So the matcher _backtracks_. When +entering a branch, it remembers its current position (in this +case, at the start of the string, just past the first boundary box in +the diagram) so that it can go back and try another branch if the +current one does not work out. For the string `"103"`, after +encountering the 3 character, it will start trying the branch for +decimal numbers. This one matches, so a match is reported after all. + +{{index [matching, algorithm]}} + +The matcher stops as soon as it finds a full +match. This means that if multiple branches could potentially match a +string, only the first one (ordered by where the branches appear in +the regular expression) is used. + +Backtracking also happens for ((repetition)) operators like + and `*`. +If you match `/^.*x/` against `"abcxe"`, the `.*` part will first try +to consume the whole string. The engine will then realize that it +needs an _x_ to match the pattern. Since there is no _x_ past the end +of the string, the star operator tries to match one character less. +But the matcher doesn't find an _x_ after `abcx` either, so it +backtracks again, matching the star operator to just `abc`. _Now_ it +finds an _x_ where it needs it and reports a successful match from +positions 0 to 4. + +{{index performance, complexity}} + +It is possible to write regular +expressions that will do a _lot_ of backtracking. This problem occurs +when a pattern can match a piece of input in many different ways. For +example, if we get confused while writing a binary-number regular expression, we +might accidentally write something like `/([01]+)+b/`. + +{{figure {url: "img/re_slow.svg", alt: "Visualization of /([01]+)+b/",width: "6cm"}}} + +{{index "inner loop", [nesting, "in regexps"]}} + +If that tries to match some +long series of zeros and ones with no trailing _b_ character, the +matcher will first go through the inner loop until it runs out of +digits. Then it notices there is no _b_, so it backtracks one +position, goes through the outer loop once, and gives up again, trying +to backtrack out of the inner loop once more. It will continue to try +every possible route through these two loops. This means the amount of +work _doubles_ with each additional character. For even just a few +dozen characters, the resulting match will take practically forever. + +## The replace method + +{{index "replace method", "regular expression"}} + +String values have a +`replace` method, which can be used to replace part of the string +with another string. + +``` +console.log("papa".replace("p", "m")); +// → mapa +``` + +{{index ["regular expression", flags], ["regular expression", global]}} + +The first +argument can also be a regular expression, in which case the first +match of the regular expression is replaced. When a `g` option (for +_global_) is added to the regular expression, _all_ matches in the +string will be replaced, not just the first. + +``` +console.log("Borobudur".replace(/[ou]/, "a")); +// → Barobudur +console.log("Borobudur".replace(/[ou]/g, "a")); +// → Barabadar +``` + +{{index [interface, design], argument}} + +It would have been sensible if the +choice between replacing one match or all matches was made through an +additional argument to `replace` or by providing a different method, +`replaceAll`. But for some unfortunate reason, the choice relies on a +property of the regular expression instead. + +{{index grouping, "capture group", "dollar sign", "replace method", ["regular expression", grouping]}} + +The real power of using +regular expressions with `replace` comes from the fact that we can +refer back to matched groups in the replacement string. For example, +say we have a big string containing the names of people, one name per +line, in the format `Lastname, Firstname`. If we want to swap these +names and remove the comma to get a simple `Firstname Lastname` +format, we can use the following code: + +``` +console.log( + "Hopper, Grace\nMcCarthy, John\nRitchie, Dennis" + .replace(/([\w ]+), ([\w ]+)/g, "$2 $1")); +// → Grace Hopper +// John McCarthy +// Dennis Ritchie +``` + +The `$1` and `$2` in the replacement string refer to the parenthesized +groups in the pattern. `$1` is replaced by the text that matched +against the first group, `$2` by the second, and so on, up to `$9`. +The whole match can be referred to with `$&`. + +{{index [function, "higher-order"], grouping, "capture group"}} + +It is also +possible to pass a function, rather than a string, as the second +argument to `replace`. For each replacement, the function will be +called with the matched groups (as well as the whole match) as +arguments, and its return value will be inserted into the new string. + +Here's a simple example: + +``` +var s = "the cia and fbi"; +console.log(s.replace(/\b(fbi|cia)\b/g, function(str) { + return str.toUpperCase(); +})); +// → the CIA and FBI +``` + +And here's a more interesting one: + +``` +var stock = "1 lemon, 2 cabbages, and 101 eggs"; +function minusOne(match, amount, unit) { + amount = Number(amount) - 1; + if (amount == 1) // only one left, remove the 's' + unit = unit.slice(0, unit.length - 1); + else if (amount == 0) + amount = "no"; + return amount + " " + unit; +} +console.log(stock.replace(/(\d+) (\w+)/g, minusOne)); +// → no lemon, 1 cabbage, and 100 eggs +``` + +This takes a string, finds all occurrences of a number followed by an +alphanumeric word, and returns a string wherein every such occurrence +is decremented by one. + +The `(\d+)` group ends up as the `amount` argument to the function, +and the `(\w+)` group gets bound to `unit`. The function converts +`amount` to a number—which always works, since it matched `\d+`—and +makes some adjustments in case there is only one or zero left. + +## Greed + +{{index greed, "regular expression"}} + +It isn't hard to use `replace` to +write a function that removes all ((comment))s from a piece of +JavaScript ((code)). Here is a first attempt: + +{{test wrap}} + +``` +function stripComments(code) { + return code.replace(/\/\/.*|\/\*[^]*\*\//g, ""); +} +console.log(stripComments("1 + /* 2 */3")); +// → 1 + 3 +console.log(stripComments("x = 10;// ten!")); +// → x = 10; +console.log(stripComments("1 /* a */+/* b */ 1")); +// → 1 1 +``` + +{{index "period character", "slash character", "newline character", "empty set", "block comment", "line comment"}} + +The +part before the _or_ operator simply matches two slash characters +followed by any number of non-newline characters. The part for +multiline comments is more involved. We use `[^]` (any character that +is not in the empty set of characters) as a way to match any +character. We cannot just use a dot here because block comments can +continue on a new line, and dots do not match the newline character. + +But the output of the previous example appears to have gone wrong. Why? + +{{index backtracking, greed, "regular expression"}} + +The `[^]*` part of +the expression, as I described in the section on backtracking, will +first match as much as it can. If that causes the next part of the +pattern to fail, the matcher moves back one character and tries again +from there. In the example, the matcher first tries to match the whole +rest of the string and then moves back from there. It will find an +occurrence of `*/` after going back four characters and match that. +This is not what we wanted—the intention was to match a single +comment, not to go all the way to the end of the code and find the end +of the last block comment. + +Because of this behavior, we say the repetition operators (`+`, `*`, +`?`, and `{}`) are _((greed))y_, meaning they match as much as they +can and backtrack from there. If you put a ((question mark)) after +them (`+?`, `*?`, `??`, `{}?`), they become nongreedy and start by +matching as little as possible, matching more only when the remaining +pattern does not fit the smaller match. + +And that is exactly what we want in this case. By having the star +match the smallest stretch of characters that brings us to a `*/`, +we consume one block comment and nothing more. + +{{test wrap}} + +``` +function stripComments(code) { + return code.replace(/\/\/.*|\/\*[^]*?\*\//g, ""); +} +console.log(stripComments("1 /* a */+/* b */ 1")); +// → 1 + 1 +``` + +A lot of ((bug))s in ((regular expression)) programs can be traced to +unintentionally using a greedy operator where a nongreedy one would +work better. When using a ((repetition)) operator, consider the +nongreedy variant first. + +## Dynamically creating RegExp objects + +{{index ["regular expression", creation], "underscore character", "RegExp constructor"}} + +There are cases where you might not know the exact +((pattern)) you need to match against when you are writing your code. +Say you want to look for the user's name in a piece of text and +enclose it in underscore characters to make it stand out. Since you +will know the name only once the program is actually running, you +can't use the slash-based notation. + +But you can build up a string and use the `RegExp` ((constructor)) on +that. Here's an example: + +``` +var name = "harry"; +var text = "Harry is a suspicious character."; +var regexp = new RegExp("\\b(" + name + ")\\b", "gi"); +console.log(text.replace(regexp, "_$1_")); +// → _Harry_ is a suspicious character. +``` + +{{index ["regular expression", flags], "backslash character"}} + +When creating +the `\b` ((boundary)) markers, we have to use two backslashes because +we are writing them in a normal string, not a slash-enclosed regular +expression. The second argument to the `RegExp` constructor contains +the options for the regular expression—in this case `"gi"` for global +and case-insensitive. + +But what if the name is `"dea+hl[]rd"` because our user is a ((nerd))y +teenager? That would result in a nonsensical regular expression, which +won't actually match the user's name. + +{{index "backslash character", [escaping, "in regexps"], ["regular expression", escaping]}} + +To work around this, we can add backslashes +before any character that we don't trust. Adding backslashes before +alphabetic characters is a bad idea because things like `\b` and `\n` +have a special meaning. But escaping everything that's not +alphanumeric or ((whitespace)) is safe. + +``` +var name = "dea+hl[]rd"; +var text = "This dea+hl[]rd guy is super annoying."; +var escaped = name.replace(/[^\w\s]/g, "\\$&"); +var regexp = new RegExp("\\b(" + escaped + ")\\b", "gi"); +console.log(text.replace(regexp, "_$1_")); +// → This _dea+hl[]rd_ guy is super annoying. +``` + +## The search method + +{{index searching, ["regular expression", methods], "indexOf method", "search method"}} + +The `indexOf` method on strings cannot be +called with a regular expression. But there is another method, +`search`, which does expect a regular expression. Like `indexOf`, it +returns the first index on which the expression was found, or -1 when +it wasn't found. + +``` +console.log(" word".search(/\S/)); +// → 2 +console.log(" ".search(/\S/)); +// → -1 +``` + +Unfortunately, there is no way to indicate that the match should start +at a given offset (like we can with the second argument to `indexOf`), +which would often be useful. + +## The lastIndex property + +{{index "exec method", "regular expression"}} + +The `exec` method similarly +does not provide a convenient way to start searching from a given +position in the string. But it does provide an _in_convenient way. + +{{index ["regular expression", matching], matching, "source property", "lastIndex property"}} + +Regular expression objects have +properties. One such property is `source`, which contains the string +that expression was created from. Another property is `lastIndex`, +which controls, in some limited circumstances, where the next match +will start. + +{{index [interface, design], "exec method", ["regular expression", global]}} + +Those circumstances are that the regular +expression must have the global (`g`) option enabled, and the match +must happen through the `exec` method. Again, a more sane solution +would have been to just allow an extra argument to be passed to +`exec`, but sanity is not a defining characteristic of JavaScript's +regular expression interface. + +``` +var pattern = /y/g; +pattern.lastIndex = 3; +var match = pattern.exec("xyzzy"); +console.log(match.index); +// → 4 +console.log(pattern.lastIndex); +// → 5 +``` + +{{index "side effect", "lastIndex property"}} + +If the match was successful, +the call to `exec` automatically updates the `lastIndex` property to +point after the match. If no match was found, `lastIndex` is set back +to zero, which is also the value it has in a newly constructed regular +expression object. + +{{index bug}} + +When using a global regular expression value for multiple +`exec` calls, these automatic updates to the `lastIndex` property can +cause problems. Your regular expression might be accidentally starting +at an index that was left over from a previous call. + +``` +var digit = /\d/g; +console.log(digit.exec("here it is: 1")); +// → ["1"] +console.log(digit.exec("and now: 1")); +// → null +``` + +{{index ["regular expression", global], "match method"}} + +Another interesting +effect of the global option is that it changes the way the `match` +method on strings works. When called with a global expression, instead +of returning an array similar to that returned by `exec`, `match` will +find _all_ matches of the pattern in the string and return an array +containing the matched strings. + +``` +console.log("Banana".match(/an/g)); +// → ["an", "an"] +``` + +So be cautious with global regular expressions. The cases where they +are necessary—calls to `replace` and places where you want to +explicitly use _lastIndex_—are typically the only places where you +want to use them. + +### Looping over matches + +{{index "lastIndex property", "exec method", loop}} + +A common pattern is +to scan through all occurrences of a pattern in a string, in a way +that gives us access to the match object in the loop body, by using +`lastIndex` and `exec`. + +``` +var input = "A string with 3 numbers in it... 42 and 88."; +var number = /\b(\d+)\b/g; +var match; +while (match = number.exec(input)) + console.log("Found", match[1], "at", match.index); +// → Found 3 at 14 +// Found 42 at 33 +// Found 88 at 40 +``` + +{{index "while loop", "= operator"}} + +This makes use of the fact that the +value of an ((assignment)) expression (`=`) is the assigned value. So +by using `match = number.exec(input)` as the condition in the `while` +statement, we perform the match at the start of each iteration, save +its result in a ((variable)), and stop looping when no more matches +are found. + +{{id ini}} +## Parsing an INI file + +{{index comment, "file format", "enemies example", "ini file"}} + +To +conclude the chapter, we'll look at a problem that calls for ((regular +expression))s. Imagine we are writing a program to automatically +harvest information about our enemies from the ((Internet)). (We will +not actually write that program here, just the part that reads the +((configuration)) file. Sorry to disappoint.) The configuration file +looks like this: + +```text/plain +searchengine=http://www.google.com/search?q=$1 +spitefulness=9.7 + +; comments are preceded by a semicolon... +; each section concerns an individual enemy +[larry] +fullname=Larry Doe +type=kindergarten bully +website=http://www.geocities.com/CapeCanaveral/11451 + +[gargamel] +fullname=Gargamel +type=evil sorcerer +outputdir=/home/marijn/enemies/gargamel +``` + +{{index grammar}} + +The exact rules for this format (which is actually a +widely used format, usually called an _INI_ file) are as follows: + +- Blank lines and lines starting with semicolons are ignored. + +- Lines wrapped in `[` and `]` start a new ((section)). + +- Lines containing an alphanumeric identifier followed by an `=` + character add a setting to the current section. + +- Anything else is invalid. + +Our task is to convert a string like this into an array of objects, +each with a `name` property and an array of settings. We'll need one +such object for each section and one for the global settings at the +top. + +{{index "carriage return", "line break", "newline character"}} + +Since the +format has to be processed ((line)) by line, splitting up the file +into separate lines is a good start. We used `string.split("\n")` to +do this in [Chapter 6](06_object.html#split). Some operating +systems, however, use not just a newline character to separate lines +but a carriage return character followed by a newline (`"\r\n"`). +Given that the `split` method also allows a regular expression as its +argument, we can split on a regular expression like `/\r?\n/` to split +in a way that allows both `"\n"` and `"\r\n"` between lines. + +``` +function parseINI(string) { + // Start with an object to hold the top-level fields + var currentSection = {name: null, fields: []}; + var categories = [currentSection]; + + string.split(/\r?\n/).forEach(function(line) { + var match; + if (/^\s*(;.*)?$/.test(line)) { + return; + } else if (match = line.match(/^\[(.*)\]$/)) { + currentSection = {name: match[1], fields: []}; + categories.push(currentSection); + } else if (match = line.match(/^(\w+)=(.*)$/)) { + currentSection.fields.push({name: match[1], + value: match[2]}); + } else { + throw new Error("Line '" + line + "' is invalid."); + } + }); + + return categories; +} +``` + +{{index "parseINI function", parsing}} + +This code goes over every line in +the file, updating the “current section” object as it goes along. +First, it checks whether the line can be ignored, using the expression +`/^\s*(;.*)?$/`. Do you see how it works? The part between the +((parentheses)) will match comments, and the `?` will make sure it +also matches lines containing only whitespace. + +If the line is not a ((comment)), the code then checks whether the +line starts a new ((section)). If so, it creates a new current section +object, to which subsequent settings will be added. + +The last meaningful possibility is that the line is a normal setting, +which the code adds to the current section object. + +If a ((line)) matches none of these forms, the function throws an +error. + +{{index "caret character", "dollar sign", boundary}} + +Note the recurring +use of `^` and `$` to make sure the expression matches the whole line, +not just part of it. Leaving these out results in code that mostly +works but behaves strangely for some input, which can be a difficult +bug to track down. + +{{index "if keyword", assignment, "= operator"}} + +The pattern `if (match += string.match(...))` is similar to the trick of using an assignment +as the condition for `while`. You often aren't sure that your call to +`match` will succeed, so you can access the resulting object only +inside an `if` statement that tests for this. To not break the +pleasant chain of `if` forms, we assign the result of the match to a +variable and immediately use that assignment as the test in the `if` +statement. + +## International characters + +{{index internationalization, Unicode, ["regular expression", internationalization]}} + +Because of JavaScript's initial +simplistic implementation and the fact that this simplistic approach +was later set in stone as ((standard)) behavior, JavaScript's regular +expressions are rather dumb about characters that do not appear in the +English language. For example, as far as JavaScript's regular +expressions are concerned, a “((word character))” is only one of the +26 characters in the Latin alphabet (uppercase or lowercase) and, for +some reason, the underscore character. Things like _é_ or _β_, which +most definitely are word characters, will not match `\w` (and _will_ +match uppercase `\W`, the nonword category). + +{{index whitespace}} + +By a strange historical accident, `\s` (whitespace) +does not have this problem and matches all characters that the +Unicode standard considers whitespace, including things like the +((nonbreaking space)) and the ((Mongolian vowel separator)). + +{{index "character category"}} + +Some ((regular expression)) +((implementation))s in other programming languages have syntax to +match specific ((Unicode)) character categories, such as “all +uppercase letters”, “all punctuation”, or “control characters”. There +are plans to add support for such categories to JavaScript, but it +unfortunately looks like they won't be realized in the near ((future)). + +{{id summary_regexp}} +## Summary + +Regular expressions are objects that represent patterns in strings. +They use their own syntax to express these patterns. + +[cols="1,5"] +|==== +|`/abc/` |A sequence of characters +|`/[abc]/` |Any character from a set of characters +|`/[^abc]/` |Any character _not_ in a set of characters +|`/[0-9]/` |Any character in a range of characters +|`/x+/` |One or more occurrences of the pattern `x` +|`/x+?/` |One or more occurrences, nongreedy +|`/x*/` |Zero or more occurrences +|`/x?/` |Zero or one occurrence +|`/x{2,4}/` |Between two and four occurrences +|`/(abc)/` |A group +|_/a{brvbar}b{brvbar}c/_ |Any one of several patterns +|`/\d/` |Any digit character +|`/\w/` |An alphanumeric character (“word character”) +|`/\s/` |Any whitespace character +|`/./` |Any character except newlines +|`/\b/` |A word boundary +|`/^/` |Start of input +|`/$/` |End of input +|==== + +A regular expression has a method `test` to test whether a given +string matches it. It also has an `exec` method that, when a match is +found, returns an array containing all matched groups. Such an array +has an `index` property that indicates where the match started. + +Strings have a `match` method to match them against a regular +expression and a `search` method to search for one, returning only the +starting position of the match. Their `replace` method can replace +matches of a pattern with a replacement string. Alternatively, you can +pass a function to `replace`, which will be used to build up a +replacement string based on the match text and matched groups. + +Regular expressions can have options, which are written after +the closing slash. The `i` option makes the match case insensitive, +while the `g` option makes the expression _global_, which, among other +things, causes the `replace` method to replace all instances instead +of just the first. + +The `RegExp` constructor can be used to create a regular expression +value from a string. + +Regular expressions are a sharp ((tool)) with an awkward handle. They +simplify some tasks tremendously but can quickly become unmanageable +when applied to complex problems. Part of knowing how to use them is +resisting the urge to try to shoehorn things that they cannot sanely +express into them. + +## Exercises + +{{index debugging, bug}} + +It is almost unavoidable that, in the course +of working on these exercises, you will get confused and frustrated by +some regular expression's inexplicable ((behavior)). Sometimes it +helps to enter your expression into an online tool like +https://www.debuggex.com/[_debuggex.com_] to see whether its +visualization corresponds to what you intended and to ((experiment)) +with the way it responds to various input strings. + +### Regexp golf + +{{index "program size", "code golf", "regexp golf (exercise)"}} + +_Code +golf_ is a term used for the game of trying to express a particular +program in as few characters as possible. Similarly, _regexp golf_ is +the practice of writing as tiny a regular expression as possible to +match a given pattern, and _only_ that pattern. + +{{index boundary, matching}} + +For each of the following items, write a ((regular +expression)) to test whether any of the given substrings occur in a +string. The regular expression should match only strings containing +one of the substrings described. Do not worry about word boundaries +unless explicitly mentioned. When your expression works, see whether you +can make it any smaller. + + 1. _car_ and _cat_ + 2. _pop_ and _prop_ + 3. _ferret_, _ferry_, and _ferrari_ + 4. Any word ending in _ious_ + 5. A whitespace character followed by a dot, comma, colon, or semicolon + 6. A word longer than six letters + 7. A word without the letter _e_ + +Refer to the table in the +[chapter summary](09_regexp.html#summary_regexp) for help. Test each +solution with a few test strings. + +{{if interactive +``` +// Fill in the regular expressions + +verify(/.../, + ["my car", "bad cats"], + ["camper", "high art"]); + +verify(/.../, + ["pop culture", "mad props"], + ["plop"]); + +verify(/.../, + ["ferret", "ferry", "ferrari"], + ["ferrum", "transfer A"]); + +verify(/.../, + ["how delicious", "spacious room"], + ["ruinous", "consciousness"]); + +verify(/.../, + ["bad punctuation ."], + ["escape the dot"]); + +verify(/.../, + ["hottentottententen"], + ["no", "hotten totten tenten"]); + +verify(/.../, + ["red platypus", "wobbling nest"], + ["earth bed", "learning ape"]); + + +function verify(regexp, yes, no) { + // Ignore unfinished exercises + if (regexp.source == "...") return; + yes.forEach(function(s) { + if (!regexp.test(s)) + console.log("Failure to match '" + s + "'"); + }); + no.forEach(function(s) { + if (regexp.test(s)) + console.log("Unexpected match for '" + s + "'"); + }); +} +``` +if}} + +### Quoting style + +{{index "quoting style (exercise)", "single-quote character", "double-quote character"}} + +Imagine you have written a +story and used single ((quotation mark))s throughout to mark pieces +of dialogue. Now you want to replace all the dialogue quotes with +double quotes, while keeping the single quotes used in contractions +like _aren't_. + +{{index "replace method"}} + +Think of a pattern that distinguishes these two +kinds of quote usage and craft a call to the `replace` method that +does the proper replacement. + +{{if interactive +{{test no}} + +``` +var text = "'I'm the cook,' he said, 'it's my job.'"; +// Change this call. +console.log(text.replace(/A/g, "B")); +// → "I'm the cook," he said, "it's my job." +``` +if}} + +{{hint + +{{index "quoting style (exercise)", boundary}} + +The most obvious solution +is to only replace quotes with a nonword character on at least one +side. Something like `/\W'|'\W/`. But you also have to take the start +and end of the line into account. + +{{index grouping, "replace method"}} + +In addition, you must ensure that +the replacement also includes the characters that were matched by the +`\W` pattern so that those are not dropped. This can be done by +wrapping them in ((parentheses)) and including their groups in the +replacement string (`$1`, `$2`). Groups that are not matched will be +replaced by nothing. + +hint}} + +### Numbers again + +{{index number}} + +A series of ((digit))s can be matched by the simple +regular expression `/\d+/`. + +{{index sign, "fractional number", syntax, minus, "plus character", exponent, "scientific notation", "period character"}} + +Write an expression that matches only JavaScript-style +numbers. It must support an optional minus _or_ plus sign in front of +the number, the decimal dot, and exponent notation—`5e-3` or _1E10_— +again with an optional sign in front of the exponent. Also note that +it is not necessary for there to be digits in front of or after the +dot, but the number cannot be a dot alone. That is, `.5` and `5.` +are valid JavaScript numbers, but a lone dot _isn't_. + +{{if interactive +{{test no}} + +``` +// Fill in this regular expression. +var number = /^...$/; + +// Tests: +["1", "-1", "+15", "1.55", ".5", "5.", "1.3e2", "1E-4", + "1e+12"].forEach(function(s) { + if (!number.test(s)) + console.log("Failed to match '" + s + "'"); +}); +["1a", "+-1", "1.2.3", "1+1", "1e4.5", ".5.", "1f5", + "."].forEach(function(s) { + if (number.test(s)) + console.log("Incorrectly accepted '" + s + "'"); +}); +``` +if}} + +{{hint + +{{index ["regular expression", escaping], "backslash character"}} + +First, do +not forget the backslash in front of the dot. + +Matching the optional ((sign)) in front of the ((number)), as well as +in front of the ((exponent)), can be done with `[+\-]?` or `(\+|-|)` +(plus, minus, or nothing). + +{{index "pipe character"}} + +The more complicated part of the exercise is the +problem of matching both `"5."` and `".5"` without also matching +`"."`. For this, a good solution is to use the `|` operator to +separate the two cases—either one or more digits optionally followed +by a dot and zero or more digits _or_ a dot followed by one or more +digits. + +{{index exponent, "case sensitivity", ["regular expression", flags]}} + +Finally, to make the _e_ case-insensitive, either +add an `i` option to the regular expression or use `[eE]`. + +hint}} + diff --git a/10_modules.md b/10_modules.md new file mode 100644 index 000000000..9b1bc92be --- /dev/null +++ b/10_modules.md @@ -0,0 +1,1041 @@ +{{meta {chap_num: 10, prev_link: 09_regexp, next_link: 11_language, load_files: ["code/chapter/10_modules.js", "code/loadfile.js"]}}} + +# Modules + +{{if interactive + +{{quote {author: "Master Yuan-Ma", title: "The Book of Programming", chapter: true} + +A beginning programmer writes her programs like an ant builds her +hill, one piece at a time, without thought for the bigger structure. +Her programs will be like loose sand. They may stand for a while, but +growing too big they fall apart. + +Realizing this problem, the programmer will start to spend a lot of +time thinking about structure. Her programs will be rigidly +structured, like rock sculptures. They are solid, but when they must +change, violence must be done to them. + +The master programmer knows when to apply structure and when to leave +things in their simple form. Her programs are like clay, solid yet +malleable. + +quote}} + +if}} + +{{index organization, "code structure"}} + +Every program has a shape. On +a small scale, this shape is determined by its division into +((function))s and the blocks inside those functions. Programmers have +a lot of freedom in the way they structure their programs. Shape follows +more from the ((taste)) of the programmer than from the program's +intended functionality. + +{{index readability}} + +When looking at a larger program in its entirety, +individual functions start to blend into the background. Such a +program can be made more readable if we have a larger unit of +organization. + +_Modules_ divide programs into clusters of code that, by _some_ +criterion, belong together. This chapter explores some of the benefits +that such division provides and shows techniques for building +((module))s in JavaScript. + +## Why modules help + +{{index "book analogy", organization}} + +There are a number of reasons why +authors divide their books into ((chapter))s and sections. These +divisions make it easier for a reader to see how the book is built up +and to find specific parts that they are interested in. They also help +the _author_ by providing a clear focus for every section. + +The benefits of organizing a program into several ((file))s or +((module))s are similar. Structure helps people who aren't yet +familiar with the code find what they are looking for and makes it +easier for the programmer to keep things that are related +close together. + +{{index "project chapter", readability, interconnection}} + +Some +programs are even organized along the model of a traditional ((text)), +with a well-defined order in which the reader is encouraged to go +through the program and with lots of prose (comments) providing a coherent +description of the code. This makes reading the program a lot less +intimidating—reading unknown code is usually intimidating—but has the +downside of being more work to set up. It also makes the program more +difficult to change because prose tends to be more tightly +interconnected than code. This style is called _((literate +programming))_. The “project” chapters of this book can be considered +literate programs. + +{{index minimalism, evolution, structure, organization}} + +As a +general rule, structuring things costs energy. In the early stages of +a project, when you are not quite sure yet what goes where or what +kind of ((module))s the program needs at all, I endorse a minimalist, +structureless attitude. Just put everything wherever it is convenient +to put it until the code stabilizes. That way, you won't be wasting +time moving pieces of the program back and forth, and you won't +accidentally lock yourself into a structure that does not actually fit +your program. + +### Namespacing + +{{index encapsulation, isolation, "global scope", "local scope"}} + +Most modern ((programming language))s have a +((scope)) level between _global_ (everyone can see it) and _local_ +(only this function can see it). JavaScript does not. Thus, by +default, everything that needs to be visible outside of the scope of a +top-level function is visible _everywhere_. + +{{index "namespace pollution"}} + +Namespace pollution, the problem of a lot of +unrelated code having to share a single set of global variable names, +was mentioned in [Chapter 4](04_data.html#namespace_pollution), +where the `Math` object was given as an example of an object that acts +like a module by grouping math-related functionality. + +{{index [function, "as namespace"]}} + +Though JavaScript provides no actual +((module)) construct yet, objects can be used to create publicly +accessible sub((namespace))s, and functions can be used to create an +isolated, private namespace inside of a module. Later in this chapter, +I will discuss a way to build reasonably convenient, namespace-isolating +modules on top of the primitive concepts that JavaScript gives us. + +### Reuse + +{{index "version control", bug, "copy-paste programming", "ini file", dependency, structure}} + +In a “flat” project, which isn't +structured as a set of ((module))s, it is not apparent which parts of +the code are needed to use a particular function. In my program for +spying on my enemies (see [Chapter 9](09_regexp.html#ini)), I wrote +a function for reading configuration files. If I want to use that +function in another project, I must go and copy out the parts of the +old program that look like they are relevant to the functionality that +I need and paste them into my new program. Then, if I find a mistake +in that code, I'll fix it only in whichever program that I'm working +with at the time and forget to also fix it in the other program. + +{{index duplication}} + +Once you have lots of such shared, duplicated pieces +of code, you will find yourself wasting a lot of time and energy on +moving them around and keeping them up-to-date. + +{{index reuse}} + +Putting pieces of functionality that stand on their own +into separate files and modules makes them easier to track, update, +and share because all the various pieces of code that want to use the +module load it from the same actual file. + +{{index dependency, library, installation, upgrading}} + +This +idea gets even more powerful when the relations between modules—which +other modules each module depends on—are explicitly stated. You can +then automate the process of installing and upgrading external modules +(_libraries_). + +{{index "package manager", download, reuse}} + +Taking this idea even +further, imagine an online service that tracks and distributes +hundreds of thousands of such libraries, allowing you to search for +the functionality you need and, once you find it, set up your project +to automatically download it. + +{{index NPM}} + +{{id modules_npm}} +This service exists. It is called NPM +(http://npmjs.org[_npmjs.org_]). NPM consists of an online database of +modules and a tool for downloading and upgrading the modules your +program depends on. It grew out of ((Node.js)), the browserless +JavaScript environment we will discuss in +[Chapter 20](20_node.html#node), but can also be useful when +programming for the browser. + +### Decoupling + +{{index isolation, decoupling, "backward compatibility"}} + +Another important role of modules is isolating pieces +of code from each other, in the same way that the object interfaces +from [Chapter 6](06_object.html#interface) do. A well-designed +module will provide an interface for external code to use. As the +module gets updated with ((bug)) fixes and new functionality, the +existing ((interface)) stays the same (it is _stable_) so that other +modules can use the new, improved version without any changes to +themselves. + +{{index stability}} + +Note that a stable interface does not mean no new +functions, methods, or variables are added. It just means that +existing functionality isn't removed and its meaning is not changed. + +{{index "implementation detail", encapsulation}} + +A good ((module)) +((interface)) should allow the module to grow without breaking the old +interface. This means exposing as few of the module's internal +concepts as possible while also making the “language” that the +interface exposes powerful and flexible enough to be applicable in a +wide range of situations. + +{{index [interface, design]}} + +For interfaces that expose a single, focused +concept, such as a configuration file reader, this design comes +naturally. For others, such as a text editor, which has many different +aspects that external code might need to access (content, styling, +user actions, and so on), it requires careful design. + +## Using functions as namespaces + +{{index namespace, [function, "as namespace"]}} + +Functions are the only things in +JavaScript that create a new ((scope)). So if we want our ((module))s +to have their own scope, we will have to base them on functions. + +{{index "weekday example", "Date type", "getDay method"}} + +Consider this +trivial module for associating names with day-of-the-week numbers, as +returned by a `Date` object's `getDay` method: + +``` +var names = ["Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday"]; +function dayName(number) { + return names[number]; +} + +console.log(dayName(1)); +// → Monday +``` + +{{index "access control", encapsulation}} + +The `dayName` function is part +of the module's ((interface)), but the `names` variable is not. We +would prefer _not_ to spill it into the ((global scope)). + +We can do this: + +``` +var dayName = function() { + var names = ["Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday"]; + return function(number) { + return names[number]; + }; +}(); + +console.log(dayName(3)); +// → Wednesday +``` + +{{index "anonymous function"}} + +Now `names` is a local variable in an +(unnamed) function. This function is created and immediately called, +and its return value (the actual `dayName` function) is stored in a +variable. We could have pages and pages of code in this function, with +100 local variables, and they would all be internal to our +module—visible to the module itself but not to outside code. + +{{index isolation, "side effect"}} + +We can use a similar pattern to +isolate code from the outside world entirely. The following module logs a +value to the console but does not actually provide any values for +other modules to use: + +``` +(function() { + function square(x) { return x * x; } + var hundred = 100; + + console.log(square(hundred)); +})(); +// → 10000 +``` + +{{index "namespace pollution"}} + +This code simply outputs the square of 100, +but in the real world it could be a module that adds a method +to some ((prototype)) or sets up a widget on a web page. It is +wrapped in a function to prevent the variables it uses internally from +polluting the ((global scope)). + +{{index parsing, "function keyword"}} + +Why did we wrap the namespace +function in a pair of ((parentheses))? This has to do with a quirk in +JavaScript's ((syntax)). If an _((expression))_ starts with the +keyword `function`, it is a function expression. However, if a +_((statement))_ starts with `function`, it is a function +_declaration_, which requires a name and, not being an expression, +cannot be called by writing parentheses after it. You can think of the +extra wrapping parentheses as a trick to force the function to be +interpreted as an expression. + +## Objects as interfaces + +{{index interface}} + +Now imagine that we want to add another function to our +day-of-the-week module, one that goes from a day name to a +number. We can't simply return the function anymore but must wrap the +two functions in an object. + +``` +var weekDay = function() { + var names = ["Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday"]; + return { + name: function(number) { return names[number]; }, + number: function(name) { return names.indexOf(name); } + }; +}(); + +console.log(weekDay.name(weekDay.number("Sunday"))); +// → Sunday +``` + +{{index exporting, "exports object", this}} + +For bigger ((module))s, +gathering all the _exported_ values into an object at the end of the +function becomes awkward since many of the exported functions are +likely to be big and you'd prefer to write them somewhere else, near +related internal code. A convenient alternative is to declare an +object (conventionally named `exports`) and add properties to that +whenever we are defining something that needs to be exported. In the +following example, the module function takes its interface object as +an argument, allowing code outside of the function to create it and store +it in a variable. (Outside of a function, `this` refers to the global +scope object.) + +``` +(function(exports) { + var names = ["Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday"]; + + exports.name = function(number) { + return names[number]; + }; + exports.number = function(name) { + return names.indexOf(name); + }; +})(this.weekDay = {}); + +console.log(weekDay.name(weekDay.number("Saturday"))); +// → Saturday +``` + +## Detaching from the global scope + +{{index [variable, global]}} + +The previous pattern is commonly used by JavaScript +modules intended for the ((browser)). The module will claim a single +global variable and wrap its code in a function in order to have its +own private ((namespace)). But this pattern still causes problems if +multiple modules happen to claim the same name or if you want to load +two ((version))s of a module alongside each other. + +{{index "module loader", "require function", CommonJS, dependency}} + +With a little plumbing, we +can create a system that allows one ((module)) to directly ask for the +((interface)) object of another module, without going through the +global scope. Our goal is a `require` function that, when given a +module name, will load that module's file (from disk or the Web, +depending on the platform we are running on) and return the +appropriate interface value. + +This approach solves the problems mentioned previously and has the added +benefit of making your program's dependencies explicit, making it +harder to accidentally make use of some module without stating that +you need it. + +{{index "readFile function", "require function"}} + +For `require` we need two +things. First, we want a function `readFile`, which returns the +content of a given file as a string. (A single such function is not +present in ((standard)) JavaScript, but different JavaScript +environments, such as the browser and Node.js, provide their own ways +of accessing ((file))s. For now, let's just pretend we have this +function.) Second, we need to be able to actually execute this +string as JavaScript code. + +{{id eval}} +## Evaluating data as code + +{{index evaluation, interpretation}} + +There are several ways to take +data (a string of code) and run it as part of the current program. + +{{index isolation, eval}} + +The most obvious way is the special operator +`eval`, which will execute a string of code in the _current_ scope. +This is usually a bad idea because it breaks some of the sane +properties that scopes normally have, such as being isolated from the +outside world. + +``` +function evalAndReturnX(code) { + eval(code); + return x; +} + +console.log(evalAndReturnX("var x = 2")); +// → 2 +``` + +{{index "Function constructor"}} + +A better way of interpreting data as code is +to use the `Function` constructor. This takes two arguments: a string +containing a comma-separated list of argument names and a string +containing the function's body. + +``` +var plusOne = new Function("n", "return n + 1;"); +console.log(plusOne(4)); +// → 5 +``` + +This is precisely what we need for our modules. We can wrap a module's +code in a function, with that function's scope becoming our module +((scope)). + +{{id commonjs}} +## Require + +{{index "require function", CommonJS}} + +The following is a minimal +implementation of `require`: + +{{test wrap}} + +``` +function require(name) { + var code = new Function("exports", readFile(name)); + var exports = {}; + code(exports); + return exports; +} + +console.log(require("weekDay").name(1)); +// → Monday +``` + +{{index "weekday example", "exports object", "Function constructor"}} + +Since the `new Function` constructor wraps the module +code in a function, we don't have to write a wrapping ((namespace)) +function in the module file itself. And since we make `exports` an +argument to the module function, the module does not have to declare +it. This removes a lot of clutter from our example module. + +``` +var names = ["Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday"]; + +exports.name = function(number) { + return names[number]; +}; +exports.number = function(name) { + return names.indexOf(name); +}; +``` + +{{index "require function"}} + +When using this pattern, a ((module)) typically +starts with a few variable declarations that load the modules it +depends on. + +{{test no}} + +``` +var weekDay = require("weekDay"); +var today = require("today"); + +console.log(weekDay.name(today.dayNumber())); +``` + +{{index efficiency}} + +The simplistic implementation of `require` given previously +has several problems. For one, it will load and run a module every +time it is _require_d, so if several modules have the same +dependency or a `require` call is put inside a function that will +be called multiple times, time and energy will be wasted. + +{{index cache}} + +This can be solved by storing the modules that have already +been loaded in an object and simply returning the existing value when +one is loaded multiple times. + +{{index "exports object", exporting}} + +The second problem is that it is +not possible for a module to directly export a value other than the +`exports` object, such as a function. For example, a module might want +to export only the constructor of the object type it defines. Right +now, it cannot do that because `require` always uses the `exports` +object it creates as the exported value. + +{{index "module object"}} + +The traditional solution for this is to provide +modules with another variable, `module`, which is an object that has a +property `exports`. This property initially points at the empty object +created by `require` but can be overwritten with another value in +order to export something else. + +{{test wrap}} +// include_code + +``` +function require(name) { + if (name in require.cache) + return require.cache[name]; + + var code = new Function("exports, module", readFile(name)); + var exports = {}, module = {exports: exports}; + code(exports, module); + + require.cache[name] = module.exports; + return module.exports; +} +require.cache = Object.create(null); +``` + +{{index "require function"}} + +We now have a module system that uses a single +global variable (`require`) to allow modules to find and use each +other without going through the ((global scope)). + +This style of module system is called _((CommonJS)) modules_, after +the pseudo-((standard)) that first specified it. It is built into the +((Node.js)) system. Real implementations do a lot more than the +example I showed. Most importantly, they have a much more intelligent +way of going from a module name to an actual piece of code, allowing +both pathnames relative to the current file and module names that +point directly to locally installed modules. + +{{id amd}} +## Slow-loading modules + +{{index loading, "synchronous I/O", blocking, "World Wide Web"}} + +Though it is possible to use the CommonJS module style when +writing JavaScript for the ((browser)), it is somewhat involved. The +reason for this is that reading a file (module) from the Web is a lot +slower than reading it from the hard disk. While a script is running +in the browser, nothing else can happen to the website on which it +runs, for reasons that will become clear in +[Chapter 14](14_event.html#timeline). This means that if every +`require` call went and fetched something from some faraway web +server, the page would freeze for a painfully long time while loading +its scripts. + +{{index Browserify, "require function", preprocessing}} + +One way to +work around this problem is to run a program like +http://browserify.org[_Browserify_] on your code before you serve it +on a web page. This will look for calls to `require`, resolve all +dependencies, and gather the needed code into a single big file. +The website itself can simply load this file to get all the modules +it needs. + +{{index AMD, dependency, "asynchronous I/O"}} + +Another solution is to wrap the +code that makes up your module in a function so that the ((module +loader)) can first load its dependencies in the background and then +call the function, initializing the ((module)), when the dependencies +have been loaded. That is what the Asynchronous Module Definition +(AMD) module system does. + +{{index "weekday example"}} + +Our trivial program with dependencies would look +like this in AMD: + +{{test no}} + +``` +define(["weekDay", "today"], function(weekDay, today) { + console.log(weekDay.name(today.dayNumber())); +}); +``` + +{{index "define function", "asynchronous programming"}} + +The `define` +function is central to this approach. It takes first an array of +module names and then a function that takes one argument for each +dependency. It will load the dependencies (if they haven't already +been loaded) in the background, allowing the page to continue working +while the files are being fetched. Once all dependencies are loaded, +`define` will call the function it was given, with the ((interface))s +of those dependencies as arguments. + +{{index "weekday example", "define function"}} + +The modules that are loaded +this way must themselves contain a call to `define`. The value used as +their interface is whatever was returned by the function passed to +`define`. Here is the `weekDay` module again: + +``` +define([], function() { + var names = ["Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday"]; + return { + name: function(number) { return names[number]; }, + number: function(name) { return names.indexOf(name); } + }; +}); +``` + +{{index "define function", "backgroundReadFile function"}} + +To be +able to show a minimal implementation of `define`, we will pretend we +have a `backgroundReadFile` function that takes a filename and a +function and calls the function with the content of the file as +soon as it has finished loading it. (link:17_http.html#getURL[Chapter +17] will explain how to write that function.) + +For the purpose of keeping track of modules while they are being +loaded, the implementation of `define` will use objects that describe +the state of modules, telling us whether they are available yet and +providing their interface when they are. + +The `getModule` function, when given a name, will return such an +object and ensure that the module is scheduled to be loaded. It uses +a ((cache)) object to avoid loading the same module twice. + +// include_code + +``` +var defineCache = Object.create(null); +var currentMod = null; + +function getModule(name) { + if (name in defineCache) + return defineCache[name]; + + var module = {exports: null, + loaded: false, + onLoad: []}; + defineCache[name] = module; + backgroundReadFile(name, function(code) { + currentMod = module; + new Function("", code)(); + }); + return module; +} +``` + +{{index "define function"}} + +We assume the loaded file also contains a +(single) call to `define`. The `currentMod` variable is used to tell +this call about the module object that is currently being loaded so +that it can update this object when it finishes loading. We will come +back to this mechanism in a moment. + +{{index dependency, "Function constructor", "asynchronous programming", "event handling"}} + +The `define` function itself uses +`getModule` to fetch or create the module objects for the current +module's dependencies. Its task is to schedule the `moduleFunction` +(the function that contains the module's actual code) to be run +whenever those dependencies are loaded. For this purpose, it defines a +function `whenDepsLoaded` that is added to the `onLoad` array of all +dependencies that are not yet loaded. This function immediately +returns if there are still unloaded dependencies, so it will do +actual work only once, when the last dependency has finished loading. It is +also called immediately, from `define` itself, in case there are no +dependencies that need to be loaded. + +// include_code + +``` +function define(depNames, moduleFunction) { + var myMod = currentMod; + var deps = depNames.map(getModule); + + deps.forEach(function(mod) { + if (!mod.loaded) + mod.onLoad.push(whenDepsLoaded); + }); + + function whenDepsLoaded() { + if (!deps.every(function(m) { return m.loaded; })) + return; + + var args = deps.map(function(m) { return m.exports; }); + var exports = moduleFunction.apply(null, args); + if (myMod) { + myMod.exports = exports; + myMod.loaded = true; + myMod.onLoad.forEach(function(f) { f(); }); + } + } + whenDepsLoaded(); +} +``` + +{{index "define function"}} + +When all dependencies are available, +`whenDepsLoaded` calls the function that holds the module, giving it +the dependencies’ interfaces as arguments. + +The first thing `define` does is store the value that `currentMod` had +when it was called in a variable `myMod`. Remember that `getModule`, +just before evaluating the code for a module, stored the corresponding +module object in `currentMod`. This allows `whenDepsLoaded` to store +the return value of the module function in that module's `exports` +property, set the module's `loaded` property to true, and call all the +functions that are waiting for the module to load. + +{{index "asynchronous programming"}} + +This code is a lot harder to follow than +the `require` function. Its execution does not follow a simple, +predictable path. Instead, multiple operations are set up to happen at +some unspecified time in the ((future)), which obscures the way the +code executes. + +A real ((AMD)) implementation is, again, quite a lot more clever about +resolving module names to actual URLs and generally more robust than +the one shown previously. The _((RequireJS))_ (http://requirejs.org[_requirejs.org_]) project provides +a popular implementation of this style of ((module loader)). + +## Interface design + +{{index [interface, design]}} + +Designing interfaces for modules and object +types is one of the subtler aspects of programming. Any nontrivial +piece of functionality can be modeled in various ways. Finding a way that +works well requires insight and foresight. + +The best way to learn the value of good interface design is to use +lots of interfaces—some good, some bad. Experience will teach +you what works and what doesn't. Never assume that a painful interface +is “just the way it is”. Fix it, or wrap it in a new interface that +works better for you. + +### Predictability + +{{index documentation, predictability, convention}} + +If programmers +can predict the way your interface works, they (or you) won't get +sidetracked as often by the need to look up how to use it. Thus, try +to follow conventions. When there is another module or part of the +standard JavaScript environment that does something similar to what +you are implementing, it might be a good idea to make your interface +resemble the existing interface. That way, it'll feel familiar to +people who know the existing interface. + +{{index cleverness}} + +Another area where predictability is important is the +actual _behavior_ of your code. It can be tempting to make an +unnecessarily clever interface with the justification that it's more +convenient to use. For example, you could accept all kinds of +different types and combinations of arguments and do the “right +thing” for all of them. Or you could provide dozens of specialized +convenience functions that provide slightly different flavors of your +module's functionality. These might make code that builds on your +interface slightly shorter, but they will also make it much harder for +people to build a clear ((mental model)) of the module's behavior. + +### Composability + +{{index composability}} + +In your interfaces, try to use the simplest ((data +structure))s possible and make functions do a single, clear thing. +Whenever practical, make them ((pure function))s (see +[Chapter 3](03_functions.html#pure)). + +{{index "array-like object"}} + +For example, it is not uncommon for modules to +provide their own array-like collection objects, with their own +interface for counting and extracting elements. Such objects won't +have `map` or `forEach` methods, and any existing function that +expects a real array won't be able to work with them. This is an +example of poor _composability_—the module cannot be easily composed +with other code. + +{{index encapsulation, "spell-check example"}} + +One example would be a +module for spell-checking text, which we might need when we want to +write a text editor. The spell-checker could be made to operate +directly on whichever complicated ((data structure))s the editor uses +and directly call internal functions in the editor to have the user +choose between spelling suggestions. If we go that way, the module +cannot be used with any other programs. On the other hand, if we +define the spell-checking interface so that you can pass it a simple +string and it will return the position in the string where it found a +possible misspelling, along with an array of suggested corrections, +then we have an interface that could also be composed with other +systems because strings and arrays are always available in +JavaScript. + +### Layered interfaces + +{{index simplicity, complexity, layering, "interface design"}} + +When designing an interface for a complex piece of +functionality—sending email, for example—you often run into a dilemma. +On the one hand, you do not want to overload the user of your +interface with details. They shouldn't have to study your interface +for 20 minutes before they can send an email. On the other hand, you +do not want to hide all the details either—when people need to do +complicated things with your module, they should be able to. + +Often the solution is to provide two interfaces: a detailed +_low-level_ one for complex situations and a simple _high-level_ one +for routine use. The second can usually be built easily using the +tools provided by the first. In the email module, the high-level +interface could just be a function that takes a message, a sender +address, and a receiver address and then sends the email. The low-level +interface would allow full control over email headers, attachments, +HTML mail, and so on. + +## Summary + +Modules provide structure to bigger programs by separating the code +into different files and namespaces. Giving these modules well-defined +interfaces makes them easier to use and reuse +and makes it possible to continue using them as the module +itself evolves. + +Though the JavaScript language is characteristically unhelpful +when it comes to modules, the flexible functions and objects it +provides make it possible to define rather nice module systems. +Function scopes can be used as internal namespaces for the module, and +objects can be used to store sets of exported values. + +There are two popular, well-defined approaches to such modules. One is +called _CommonJS Modules_ and revolves around a `require` function +that fetches a module by name and returns its interface. The other is +called _AMD_ and uses a `define` function that takes an array of +module names and a function and, after loading the modules, runs the +function with their interfaces as arguments. + +## Exercises + +### Month names + +{{index "Date type", "weekday example", "month name (exercise)"}} + +Write a +simple module similar to the `weekDay` module that can convert month +numbers (zero-based, as in the `Date` type) to names and can convert names back +to numbers. Give it its own namespace since it will need an internal +array of month names, and use plain JavaScript, without any module +loader system. + +{{if interactive + +{{test no}} + +``` +// Your code here. + +console.log(month.name(2)); +// → March +console.log(month.number("November")); +// → 10 +``` +if}} + +{{hint + +{{index "month name (exercise)"}} + +This follows the `weekDay` module almost +exactly. A function expression, called immediately, wraps the variable +that holds the array of names, along with the two functions that must +be exported. The functions are put in an object and returned. The +returned interface object is stored in the `month` variable. + +hint}} + +### A return to electronic life + +{{index "electronic life", module}} + +Hoping that +[Chapter 7](07_elife.html#elife) is still somewhat fresh in your +mind, think back to the system designed in that chapter and come up +with a way to separate the code into modules. To refresh your memory, +these are the functions and types defined in that chapter, in order of +appearance: + +```null +Vector +Grid +directions +directionNames +randomElement +BouncingCritter +elementFromChar +World +charFromElement +Wall +View +WallFollower +dirPlus +LifelikeWorld +Plant +PlantEater +SmartPlantEater +Tiger +``` + +{{index "book analogy"}} + +Don't exaggerate and create too many modules. A book +that starts a new chapter for every page would probably get on your +nerves, if only because of all the space wasted on titles. Similarly, +having to open 10 files to read a tiny project isn't helpful. Aim for +three to five modules. + +{{index encapsulation}} + +You can choose to have some functions become +internal to their module and thus inaccessible to other modules. + +There is no single correct solution here. Module organization is +largely a matter of ((taste)). + +{{hint + +Here is what I came up with. I've put parentheses around internal +functions. + +```null +Module "grid" + Vector + Grid + directions + directionNames + +Module "world" + (randomElement) + (elementFromChar) + (charFromElement) + View + World + LifelikeWorld + directions [reexported] + +Module "simple_ecosystem" + (randomElement) [duplicated] + (dirPlus) + Wall + BouncingCritter + WallFollower + +Module "ecosystem" + Wall [duplicated] + Plant + PlantEater + SmartPlantEater + Tiger +``` + +{{index exporting}} + +I have reexported the `directions` array from the +`grid` module from `world` so that modules built on that (the +ecosystems) don't have to know or worry about the existence of the +`grid` module. + +{{index duplication}} + +I also duplicated two generic and tiny helper values +(`randomElement` and `Wall`) since they are used as internal details +in different contexts and do not belong in the interfaces for these +modules. + +hint}} + +### Circular dependencies + +{{index dependency, "circular dependency", "require function"}} + +A +tricky subject in dependency management is circular dependencies, +where module A depends on B, and B also depends on A. Many module +systems simply forbid this. ((CommonJS)) modules allow a limited form: +it works as long as the modules do not replace their default `exports` +object with another value and start accessing each other's +interface only after they finish loading. + +Can you think of a way in which support for this feature could be +implemented? Look back to the definition of `require` and consider +what the function would have to do to allow this. + +{{hint + +{{index overriding, "circular dependency", "exports object"}} + +The trick +is to add the `exports` object created for a module to `require`'s +((cache)) _before_ actually running the module. This means the module +will not yet have had a chance to override `module.exports`, so we do +not know whether it may want to export some other value. After +loading, the cache object is overridden with `module.exports`, which +may be a different value. + +But if in the course of loading the module, a second module is loaded +that asks for the first module, its default `exports` object, which is likely +still empty at this point, will be in the cache, and the second module +will receive a reference to it. If it doesn't try to do anything with +the object until the first module has finished loading, things will +work. + +hint}} + diff --git a/11_language.md b/11_language.md new file mode 100644 index 000000000..1cb1bcb79 --- /dev/null +++ b/11_language.md @@ -0,0 +1,968 @@ +{{meta {chap_num: 11, prev_link: 10_modules, next_link: 12_browser, load_files: ["code/chapter/11_language.js"], zip: "node/html"}}} + +# Project: A Programming Language + +{{quote {author: "Hal Abelson and Gerald Sussman", title: "Structure and Interpretation of Computer Programs", chapter: true} + +The evaluator, which determines the meaning of expressions in a +programming language, is just another program. + +quote}} + +{{if interactive + +{{quote {author: "Master Yuan-Ma", title: "The Book of Programming", chapter: true} + +When a student asked the master about the nature of the cycle of Data +and Control, Yuan-Ma replied ‘Think of a compiler, compiling itself.’ + +quote}} + +if}} + +{{index "Abelson, Hal", "Sussman, Gerald", SICP, "project chapter"}} + +Building your own +((programming language)) is surprisingly easy (as long as you do not +aim too high) and very enlightening. + +The main thing I want to show in this chapter is that there is no +((magic)) involved in building your own language. I've often felt that +some human inventions were so immensely clever and complicated that +I'd never be able to understand them. But with a little reading and +tinkering, such things often turn out to be quite mundane. + +{{index "Egg language"}} + +We will build a programming language called Egg. It +will be a tiny, simple language but one that is powerful enough to +express any computation you can think of. It will also allow simple +((abstraction)) based on ((function))s. + +{{id parsing}} +## Parsing + +{{index parsing, validation}} + +The most immediately visible part of a +programming language is its _((syntax))_, or notation. A _parser_ is a +program that reads a piece of text and produces a data structure that +reflects the structure of the program contained in that text. If the +text does not form a valid program, the parser should complain and +point out the error. + +{{index "special form"}} + +Our language will have a simple and uniform +syntax. Everything in Egg is an ((expression)). An expression can be a +variable, a number, a string, or an _application_. Applications are +used for function calls but also for constructs such as `if` or `while`. + +{{index "double-quote character", parsing, [escaping, "in strings"]}} + +To +keep the parser simple, strings in Egg do not support anything like +backslash escapes. A string is simply a sequence of characters that +are not double quotes, wrapped in double quotes. A number is a +sequence of digits. Variable names can consist of any character that +is not ((whitespace)) and does not have a special meaning in the +syntax. + +{{index "comma character"}} + +Applications are written the way they are in +JavaScript, by putting ((parentheses)) after an expression and having +any number of ((argument))s between those parentheses, separated by +commas. + +```null +do(define(x, 10), + if(>(x, 5), + print("large"), + print("small"))) +``` + +{{index block}} + +The ((uniformity)) of the ((Egg language)) means that +things that are ((operator))s in JavaScript (such as `>`) are normal +variables in this language, applied just like other ((function))s. And +since the ((syntax)) has no concept of a block, we need a `do` +construct to represent doing multiple things in sequence. + +{{index "type property", parsing}} + +The ((data structure)) that the parser will +use to describe a program will consist of ((expression)) objects, each +of which has a `type` property indicating the kind of expression it is +and other properties to describe its content. + +{{index identifier}} + +Expressions of type `"value"` represent literal strings +or numbers. Their `value` property contains the string or number value +that they represent. Expressions of type `"word"` are used for +identifiers (names). Such objects have a `name` property that holds +the identifier's name as a string. Finally, `"apply"` expressions +represent applications. They have an `operator` property that refers +to the expression that is being applied, and they have an `args` property that +refers to an array of argument expressions. + +The `>(x, 5)` part of the previous program would be represented like this: + +```application/json +{ + type: "apply", + operator: {type: "word", name: ">"}, + args: [ + {type: "word", name: "x"}, + {type: "value", value: 5} + ] +} +``` + +{{indexsee "abstract syntax tree", "syntax tree"}} + +Such a ((data structure)) is called a _((syntax tree))_. If you +imagine the objects as dots and the links between them as lines +between those dots, it has a ((tree))like shape. The fact that +expressions contain other expressions, which in turn might contain +more expressions, is similar to the way branches split and split again. + +{{figure {url: "img/syntax_tree.svg", alt: "The structure of a syntax tree",width: "5cm"}}} + +{{index parsing}} + +Contrast this to the parser we wrote for the +configuration file format in [Chapter 9](09_regexp.html#ini), which +had a simple structure: it split the input into lines and +handled those lines one at a time. There were only a few simple forms +that a line was allowed to have. + +{{index recursion, [nesting, "of expressions"]}} + +Here we must find a +different approach. Expressions are not separated into lines, and they +have a recursive structure. Application expressions _contain_ other +expressions. + +{{index elegance}} + +Fortunately, this problem can be solved elegantly by +writing a parser function that is recursive in a way that reflects the +recursive nature of the language. + +{{index "parseExpression function", "syntax tree"}} + +We define a function +`parseExpression`, which takes a string as input and returns an +object containing the data structure for the expression at the start +of the string, along with the part of the string left after parsing +this expression. When parsing subexpressions (the argument to an +application, for example), this function can be called again, yielding +the argument expression as well as the text that remains. This text +may in turn contain more arguments or may be the closing parenthesis +that ends the list of arguments. + +This is the first part of the parser: + +// include_code + +``` +function parseExpression(program) { + program = skipSpace(program); + var match, expr; + if (match = /^"([^"]*)"/.exec(program)) + expr = {type: "value", value: match[1]}; + else if (match = /^\d+\b/.exec(program)) + expr = {type: "value", value: Number(match[0])}; + else if (match = /^[^\s(),"]+/.exec(program)) + expr = {type: "word", name: match[0]}; + else + throw new SyntaxError("Unexpected syntax: " + program); + + return parseApply(expr, program.slice(match[0].length)); +} + +function skipSpace(string) { + var first = string.search(/\S/); + if (first == -1) return ""; + return string.slice(first); +} +``` + +{{index "skipSpace function"}} + +Because Egg allows any amount of +((whitespace)) between its elements, we have to repeatedly cut the +whitespace off the start of the program string. This is what the +`skipSpace` function helps with. + +{{index "literal expression", "SyntaxError type"}} + +After skipping any +leading space, `parseExpression` uses three ((regular expression))s to +spot the three simple (atomic) elements that Egg supports: strings, +numbers, and words. The parser constructs a different kind of data +structure depending on which one matches. If the input does not match +one of these three forms, it is +not a valid expression, and the parser throws an error. `SyntaxError` is a +standard error object type, which is raised when an attempt is made to +run an invalid JavaScript program. + +{{index "parseApply function"}} + +We can then cut off the part that we matched +from the program string and pass that, along with the object for the +expression, to `parseApply`, which checks whether the expression is an +application. If so, it parses a parenthesized list of arguments. + +// include_code + +``` +function parseApply(expr, program) { + program = skipSpace(program); + if (program[0] != "(") + return {expr: expr, rest: program}; + + program = skipSpace(program.slice(1)); + expr = {type: "apply", operator: expr, args: []}; + while (program[0] != ")") { + var arg = parseExpression(program); + expr.args.push(arg.expr); + program = skipSpace(arg.rest); + if (program[0] == ",") + program = skipSpace(program.slice(1)); + else if (program[0] != ")") + throw new SyntaxError("Expected ',' or ')'"); + } + return parseApply(expr, program.slice(1)); +} +``` + +{{index parsing}} + +If the next character in the program is not an opening +parenthesis, this is not an application, and `parseApply` simply +returns the expression it was given. + +{{index recursion}} + +Otherwise, it skips the opening parenthesis and +creates the ((syntax tree)) object for this application expression. It +then recursively calls `parseExpression` to parse each argument until a +closing parenthesis is found. The recursion is indirect, through +`parseApply` and `parseExpression` calling each other. + +Because an application expression can itself be applied (such as in +`multiplier(2)(1)`), `parseApply` must, after it has parsed an +application, call itself again to check whether another pair of +parentheses follows. + +{{index "syntax tree", "Egg language", "parse function"}} + +This is all we +need to parse Egg. We wrap it in a convenient `parse` function that +verifies that it has reached the end of the input string after parsing +the expression (an Egg program is a single expression), and that +gives us the program's data structure. + +{{includeCode "strip_log"}} +{{test join}} + +``` +function parse(program) { + var result = parseExpression(program); + if (skipSpace(result.rest).length > 0) + throw new SyntaxError("Unexpected text after program"); + return result.expr; +} + +console.log(parse("+(a, 10)")); +// → {type: "apply", +// operator: {type: "word", name: "+"}, +// args: [{type: "word", name: "a"}, +// {type: "value", value: 10}]} +``` + +{{index "error message"}} + +It works! It doesn't give us very helpful +information when it fails and doesn't store the line and column on +which each expression starts, which might be helpful when reporting +errors later, but it's good enough for our purposes. + +## The evaluator + +{{index "evaluate function", evaluation, interpretation, "syntax tree", "Egg language"}} + +What can we do with the syntax tree for a +program? Run it, of course! And that is what the evaluator does. You +give it a syntax tree and an environment object that associates names +with values, and it will evaluate the expression that the tree +represents and return the value that this produces. + +// include_code + +``` +function evaluate(expr, env) { + switch(expr.type) { + case "value": + return expr.value; + + case "word": + if (expr.name in env) + return env[expr.name]; + else + throw new ReferenceError("Undefined variable: " + + expr.name); + case "apply": + if (expr.operator.type == "word" && + expr.operator.name in specialForms) + return specialForms[expr.operator.name](expr.args, + env); + var op = evaluate(expr.operator, env); + if (typeof op != "function") + throw new TypeError("Applying a non-function."); + return op.apply(null, expr.args.map(function(arg) { + return evaluate(arg, env); + })); + } +} + +var specialForms = Object.create(null); +``` + +{{index "literal expression", environment}} + +The evaluator has code for +each of the ((expression)) types. A literal value expression simply +produces its value. (For example, the expression `100` just evaluates +to the number 100.) For a variable, we must check whether it is +actually defined in the environment and, if it is, fetch the +variable's value. + +{{index [function, application]}} + +Applications are more involved. If they are +a ((special form)), like `if`, we do not evaluate anything and simply +pass the argument expressions, along with the environment, to the +function that handles this form. If it is a normal call, we evaluate +the operator, verify that it is a function, and call it with the +result of evaluating the arguments. + +We will use plain JavaScript function values to represent Egg's +function values. We will come back to this +[later](11_language.html#egg_fun), when the special form called +`fun` is defined. + +{{index readability, "evaluate function", recursion, parsing}} + +The recursive structure of +`evaluate` resembles the similar structure of the parser. Both mirror +the structure of the language itself. It would also be possible to +integrate the parser with the evaluator and evaluate during parsing, +but splitting them up this way makes the program more readable. + +{{index "Egg language", interpretation}} + +This is really all that is +needed to interpret Egg. It is that simple. But without defining a few +special forms and adding some useful values to the ((environment)), +you can't do anything with this language yet. + +## Special forms + +{{index "special form", "specialForms object"}} + +The `specialForms` object +is used to define special syntax in Egg. It associates words with +functions that evaluate such special forms. It is currently empty. +Let's add some forms. + +// include_code + +``` +specialForms["if"] = function(args, env) { + if (args.length != 3) + throw new SyntaxError("Bad number of args to if"); + + if (evaluate(args[0], env) !== false) + return evaluate(args[1], env); + else + return evaluate(args[2], env); +}; +``` + +{{index "conditional execution"}} + +Egg's `if` construct expects exactly three +arguments. It will evaluate the first, and if the result isn't the +value `false`, it will evaluate the second. Otherwise, the third gets +evaluated. This `if` form is more similar to JavaScript's ternary `?:` +operator than to JavaScript's `if`. It is an expression, not a statement, +and it produces a value, namely, the result of the second or third +argument. + +{{index Boolean}} + +Egg differs from JavaScript in how it handles the +condition value to `if`. It will not treat things like zero or the +empty string as false, but only the precise value `false`. + +{{index "short-circuit evaluation"}} + +The reason we need to represent `if` as +a special form, rather than a regular function, is that all arguments +to functions are evaluated before the function is called, whereas +`if` should evaluate only _either_ its second or its third argument, +depending on the value of the first. + +The `while` form is similar. + +// include_code + +``` +specialForms["while"] = function(args, env) { + if (args.length != 2) + throw new SyntaxError("Bad number of args to while"); + + while (evaluate(args[0], env) !== false) + evaluate(args[1], env); + + // Since undefined does not exist in Egg, we return false, + // for lack of a meaningful result. + return false; +}; +``` + +Another basic building block is `do`, which executes all its arguments +from top to bottom. Its value is the value produced by the last +argument. + +// include_code + +``` +specialForms["do"] = function(args, env) { + var value = false; + args.forEach(function(arg) { + value = evaluate(arg, env); + }); + return value; +}; +``` + +{{index "= operator"}} + +To be able to create ((variable))s and give them new +values, we also create a form called `define`. It expects a word as +its first argument and an expression producing the value to assign to +that word as its second argument. Since `define`, like everything, is +an expression, it must return a value. We'll make it return the value +that was assigned (just like JavaScript's `=` operator). + +// include_code + +``` +specialForms["define"] = function(args, env) { + if (args.length != 2 || args[0].type != "word") + throw new SyntaxError("Bad use of define"); + var value = evaluate(args[1], env); + env[args[0].name] = value; + return value; +}; +``` + +## The environment + +{{index "Egg language", "evaluate function"}} + +The ((environment)) accepted +by `evaluate` is an object with properties whose names correspond to +variable names and whose values correspond to the values those +((variable))s are bound to. Let's define an environment object to +represent the ((global scope)). + +To be able to use the `if` construct we just defined, we must +have access to ((Boolean)) values. Since there are only two +Boolean values, we do not need special syntax for them. We simply bind +two variables to the values `true` and `false` and use those. + +// include_code + +``` +var topEnv = Object.create(null); + +topEnv["true"] = true; +topEnv["false"] = false; +``` + +We can now evaluate a simple expression that negates a Boolean value. + +``` +var prog = parse("if(true, false, true)"); +console.log(evaluate(prog, topEnv)); +// → false +``` + +{{index arithmetic, "Function constructor"}} + +To supply basic +((arithmetic)) and ((comparison)) ((operator))s, we will also add some +function values to the ((environment)). In the interest of keeping the +code short, we'll use `new Function` to synthesize a bunch of operator +functions in a loop, rather than defining them all individually. + +// include_code + +``` +["+", "-", "*", "/", "==", "<", ">"].forEach(function(op) { + topEnv[op] = new Function("a, b", "return a " + op + " b;"); +}); +``` + +A way to ((output)) values is also very useful, so we'll wrap +`console.log` in a function and call it `print`. + +// include_code + +``` +topEnv["print"] = function(value) { + console.log(value); + return value; +}; +``` + +{{index parsing, "run function"}} + +That gives us enough elementary tools +to write simple programs. The following `run` function provides a +convenient way to write and run them. It creates a fresh environment +and parses and evaluates the strings we give it as a single program. + +// include_code + +``` +function run() { + var env = Object.create(topEnv); + var program = Array.prototype.slice + .call(arguments, 0).join("\n"); + return evaluate(parse(program), env); +} +``` + +{{index "join method", "call method"}} + +The use of +`Array.prototype.slice.call` is a trick to turn an ((array-like +object)), such as `arguments`, into a real array so that we can call +`join` on it. It takes all the arguments given to `run` and treats +them as the lines of a program. + +``` +run("do(define(total, 0),", + " define(count, 1),", + " while(<(count, 11),", + " do(define(total, +(total, count)),", + " define(count, +(count, 1)))),", + " print(total))"); +// → 55 +``` + +{{index "summing example", "Egg language"}} + +This is the program we've seen +several times before, which computes the sum of the numbers 1 to 10, +expressed in Egg. It is clearly uglier than the equivalent JavaScript +program but not bad for a language implemented in less than 150 +((lines of code)). + +{{id egg_fun}} +## Functions + +{{index function, "Egg language"}} + +A programming language without +functions is a poor programming language indeed. + +Fortunately, it is not hard to add a `fun` construct, which treats its +last argument as the function's body and treats all the arguments before that as +the names of the function's arguments. + +// include_code + +``` +specialForms["fun"] = function(args, env) { + if (!args.length) + throw new SyntaxError("Functions need a body"); + function name(expr) { + if (expr.type != "word") + throw new SyntaxError("Arg names must be words"); + return expr.name; + } + var argNames = args.slice(0, args.length - 1).map(name); + var body = args[args.length - 1]; + + return function() { + if (arguments.length != argNames.length) + throw new TypeError("Wrong number of arguments"); + var localEnv = Object.create(env); + for (var i = 0; i < arguments.length; i++) + localEnv[argNames[i]] = arguments[i]; + return evaluate(body, localEnv); + }; +}; +``` + +{{index "local scope", "Object.create function", prototype}} + +Functions +in Egg have their own local environment, just like in JavaScript. We +use `Object.create` to make a new object that has access to the +variables in the outer environment (its prototype) but that can also +contain new variables without modifying that outer scope. + +{{index "power example", evaluation, interpretation}} + +The function +created by the `fun` form creates this local environment and adds the +argument variables to it. It then evaluates the function body in this +environment and returns the result. + +{{startCode}} + +``` +run("do(define(plusOne, fun(a, +(a, 1))),", + " print(plusOne(10)))"); +// → 11 + +run("do(define(pow, fun(base, exp,", + " if(==(exp, 0),", + " 1,", + " *(base, pow(base, -(exp, 1)))))),", + " print(pow(2, 10)))"); +// → 1024 +``` + +## Compilation + +{{index interpretation, compilation}} + +What we have built is an +interpreter. During evaluation, it acts directly on the representation +of the program produced by the parser. + +{{index efficiency, performance}} + +_Compilation_ is the process of +adding another step between the parsing and the running of a program, +which transforms the program into something that can be evaluated more +efficiently by doing as much work as possible in advance. For example, +in well-designed languages it is obvious, for each use of a +((variable)), which variable is being referred to, without actually +running the program. This can be used to avoid looking up the variable +by name every time it is accessed and to directly fetch it from some +predetermined ((memory)) location. + +Traditionally, ((compilation)) involves converting the program to +((machine code)), the raw format that a computer's processor can +execute. But any process that converts a program to a different +representation can be thought of as compilation. + +{{index simplicity, "Function constructor", transpilation}} + +It would +be possible to write an alternative ((evaluation)) strategy for Egg, +one that first converts the program to a JavaScript program, uses `new +Function` to invoke the JavaScript compiler on it, and then runs the +result. When done right, this would make Egg run very fast while +still being quite simple to implement. + +If you are interested in this topic and willing to spend some time on +it, I encourage you to try to implement such a compiler as an +exercise. + +## Cheating + +{{index "Egg language"}} + +When we defined `if` and `while`, you probably +noticed that they were more or less trivial wrappers around +JavaScript's own `if` and `while`. Similarly, the values in Egg are +just regular old JavaScript values. + +If you compare the implementation of Egg, built on top of JavaScript, +with the amount of work and complexity required to build a programming +language directly on the raw functionality provided by a machine, the +difference is huge. Regardless, this example hopefully gave you an +impression of the way ((programming language))s work. + +And when it comes to getting something done, cheating is more +effective than doing everything yourself. Though the toy language in +this chapter doesn't do anything that couldn't be done better in +JavaScript, there _are_ situations where writing small languages helps +get real work done. + +Such a language does not have to resemble a typical programming +language. If JavaScript didn't come equipped with regular expressions, +you could write your own parser and evaluator for such a sublanguage. + +{{index "artificial intelligence"}} + +Or imagine you are building a giant +robotic ((dinosaur)) and need to program its ((behavior)). JavaScript +might not be the most effective way to do this. You might instead opt +for a language that looks like this: + +```null +behavior walk + perform when + destination ahead + actions + move left-foot + move right-foot + +behavior attack + perform when + Godzilla in-view + actions + fire laser-eyes + launch arm-rockets +``` + +{{index expressivity}} + +This is what is usually called a _((domain-specific +language))_, a language tailored to express a narrow domain of +knowledge. Such a language can be more expressive than a +general-purpose language because it is designed to express exactly the +things that need expressing in its domain and nothing else. + +## Exercises + +### Arrays + +{{index "Egg language"}} + +Add support for ((array))s to Egg by adding the +following three functions to the top scope: `array(...)` to +construct an array containing the argument values, `length(array)` to +get an array's length, and `element(array, n)` to fetch the n^th^ +element from an array. + +{{if interactive + +{{test no}} + +``` +// Modify these definitions... + +topEnv["array"] = "..."; + +topEnv["length"] = "..."; + +topEnv["element"] = "..."; + +run("do(define(sum, fun(array,", + " do(define(i, 0),", + " define(sum, 0),", + " while(<(i, length(array)),", + " do(define(sum, +(sum, element(array, i))),", + " define(i, +(i, 1)))),", + " sum))),", + " print(sum(array(1, 2, 3))))"); +// → 6 +``` +if}} + +{{hint + +The easiest way to do this is to represent Egg arrays +with JavaScript arrays. + +{{index "slice method"}} + +The values added to the top environment must be +functions. `Array.prototype.slice` can be used to convert an +`arguments` array-like object into a regular array. + +hint}} + +### Closure + +{{index closure, [function, scope]}} + +The way we have defined `fun` allows +functions in Egg to “close over” the surrounding environment, allowing +the function's body to use local values that were visible at the time +the function was defined, just like JavaScript functions do. + +The following program illustrates this: function `f` returns a function +that adds its argument to `f`'s argument, meaning that it needs access +to the local ((scope)) inside `f` to be able to use variable `a`. + +``` +run("do(define(f, fun(a, fun(b, +(a, b)))),", + " print(f(4)(5)))"); +// → 9 +``` + +Go back to the definition of the `fun` form and explain which +mechanism causes this to work. + +{{hint + +{{index closure}} + +Again, we are riding along on a JavaScript mechanism to +get the equivalent feature in Egg. Special forms are passed the local +environment in which they are evaluated so that they can evaluate +their subforms in that environment. The function returned by `fun` +closes over the `env` argument given to its enclosing function and +uses that to create the function's local ((environment)) when it is +called. + +{{index compilation}} + +This means that the ((prototype)) of the local +environment will be the environment in which the function was created, +which makes it possible to access variables in that environment from +the function. This is all there is to implementing closure (though to +compile it in a way that is actually efficient, you'd need to do some +more work). + +hint}} + +### Comments + +{{index "hash character", "Egg language"}} + +It would be nice if we could +write ((comment))s in Egg. For example, whenever we find a hash sign +(`#`), we could treat the rest of the line as a comment and ignore it, +similar to `//` in JavaScript. + +{{index "skipSpace function"}} + +We do not have to make any big changes to the +parser to support this. We can simply change `skipSpace` to skip +comments like they are ((whitespace)) so that all the points where +`skipSpace` is called will now also skip comments. Make this change. + +{{if interactive + +{{test no}} + +``` +// This is the old skipSpace. Modify it... +function skipSpace(string) { + var first = string.search(/\S/); + if (first == -1) return ""; + return string.slice(first); +} + +console.log(parse("# hello\nx")); +// → {type: "word", name: "x"} + +console.log(parse("a # one\n # two\n()")); +// → {type: "apply", +// operator: {type: "word", name: "a"}, +// args: []} +``` +if}} + +{{hint + +{{index comment}} + +Make sure your solution handles multiple comments in a +row, with potentially ((whitespace)) between or after them. + +A ((regular expression)) is probably the easiest way to solve this. +Write something that matches “whitespace or a comment, zero or more +times”. Use the `exec` or `match` method and look at the length of +the first element in the returned array (the whole match) to find out +how many characters to slice off. + +hint}} + +### Fixing scope + +{{index [variable, definition], assignment}} + +Currently, the only way to +assign a ((variable)) a value is `define`. This construct acts as +a way both to define new variables and to give existing ones a new value. + +{{index "local variable"}} + +This ((ambiguity)) causes a problem. When you try +to give a nonlocal variable a new value, you will end up defining a +local one with the same name instead. (Some languages work like this +by design, but I've always found it a silly way to handle ((scope)).) + +{{index "ReferenceError type"}} + +Add a special form `set`, similar to +`define`, which gives a variable a new value, updating the variable in +an outer scope if it doesn't already exist in the inner scope. If the +variable is not defined at all, throw a `ReferenceError` (which is +another standard error type). + +{{index "hasOwnProperty method", prototype, "getPrototypeOf function"}} + +The technique of representing scopes as simple objects, +which has made things convenient so far, will get in your way a +little at this point. You might want to use the +`Object.getPrototypeOf` function, which returns the prototype of an +object. Also remember that scopes do not derive from +`Object.prototype`, so if you want to call `hasOwnProperty` on them, +you have to use this clumsy expression: + +{{test no}} + +``` +Object.prototype.hasOwnProperty.call(scope, name); +``` + +This fetches the `hasOwnProperty` method from the `Object` prototype +and then calls it on a scope object. + +{{if interactive + +{{test no}} + +``` +specialForms["set"] = function(args, env) { + // Your code here. +}; + +run("do(define(x, 4),", + " define(setx, fun(val, set(x, val))),", + " setx(50),", + " print(x))"); +// → 50 +run("set(quux, true)"); +// → Some kind of ReferenceError +``` +if}} + +{{hint + +{{index [variable, definition], assignment, "getPrototypeOf function", "hasOwnProperty method"}} + +You will have to loop through +one ((scope)) at a time, using `Object.getPrototypeOf` to go the next +outer scope. For each scope, use `hasOwnProperty` to find out whether the +variable, indicated by the `name` property of the first argument to +`set`, exists in that scope. If it does, set it to the result of +evaluating the second argument to `set` and then return that value. + +{{index "global scope", "run-time error"}} + +If the outermost scope is +reached (`Object.getPrototypeOf` returns null) and we haven't found +the variable yet, it doesn't exist, and an error should be thrown. + +hint}} + diff --git a/12_browser.md b/12_browser.md new file mode 100644 index 000000000..303bd08a9 --- /dev/null +++ b/12_browser.md @@ -0,0 +1,455 @@ +{{meta {chap_num: 12, prev_link: 11_language, next_link: 13_dom}}} + +# JavaScript and the Browser + +{{quote {author: "Douglas Crockford,The JavaScript Programming Language (video lecture)", chapter: true} + +The browser is a really hostile programming environment. + +quote}} + +{{index "Crockford, Douglas", [JavaScript, "history of"], "World Wide Web"}} + +The next part of this book will talk about web browsers. Without +web ((browser))s, there would be no JavaScript. And even if there +were, no one would ever have paid any attention to it. + +{{index decentralization, compatibility}} + +Web technology has, from the +start, been decentralized, not just technically but also in the +way it has evolved. Various browser vendors have added new +functionality in ad hoc and sometimes poorly thought out ways, which +then sometimes ended up being adopted by others and finally set down +as a ((standard)). + +This is both a blessing and a curse. On the one hand, it is empowering +to not have a central party control a system but have it be improved +by various parties working in loose ((collaboration)) (or, +occasionally, open hostility). On the other hand, the haphazard way in +which the Web was developed means that the resulting system is not +exactly a shining example of internal ((consistency)). In fact, some +parts of it are downright messy and confusing. + +## Networks and the Internet + +Computer ((network))s have been around since the 1950s. If you put +cables between two or more computers and allow them to send data back +and forth through these cables, you can do all kinds of wonderful +things. + +If connecting two machines in the same building allows us to do +wonderful things, connecting machines all over the planet should be +even better. The technology to start implementing this vision was +developed in the 1980s, and the resulting network is called the +_((Internet))_. It has lived up to its promise. + +A computer can use this network to spew bits at another computer. For +any effective ((communication)) to arise out of this bit-spewing, the +computers at both ends must know what the bits are supposed to +represent. The meaning of any given sequence of bits depends entirely +on the kind of thing that it is trying to express and on the +((encoding)) mechanism used. + +A _network ((protocol))_ describes a style of communication over a +((network)). There are protocols for sending email, for fetching email, +for sharing files, or even for controlling computers that happen to be +infected by malicious software. + +For example, a simple ((chat)) protocol might consist of one computer +sending the bits that represent the text “CHAT?” to another machine +and the other responding with “OK!” to confirm that it understands the +protocol. They can then proceed to send each other strings of text, +read the text sent by the other from the network, and display whatever +they receive on their screens. + +{{index layering, stream, ordering}} + +Most protocols are built on +top of other protocols. Our example chat protocol treats the network +as a streamlike device into which you can put bits and have them +arrive at the correct destination in the correct order. Ensuring those +things is already a rather difficult technical problem. + +{{index TCP}} + +{{indexsee "Transmission Control Protocol", TCP}} + +The _Transmission Control Protocol_ (TCP) is a ((protocol)) that +solves this problem. All Internet-connected devices “speak” it, and +most communication on the ((Internet)) is built on top of it. + +{{index "listening (TCP)"}} + +A TCP ((connection)) works as follows: one +computer must be waiting, or _listening_, for other computers to start +talking to it. To be able to listen for different kinds of +communication at the same time on a single machine, each listener has +a number (called a _((port))_) associated with it. Most ((protocol))s +specify which port should be used by default. For example, when we +want to send an email using the ((SMTP)) protocol, the machine through +which we send it is expected to be listening on port 25. + +Another computer can then establish a ((connection)) by connecting to +the target machine using the correct port number. If the target +machine can be reached and is listening on that port, the connection +is successfully created. The listening computer is called the +_((server))_, and the connecting computer is called the _((client))_. + +Such a connection acts as a two-way ((pipe)) through which bits can +flow—the machines on both ends can put data into it. Once the bits are +successfully transmitted, they can be read out again by the machine on +the other side. This is a convenient model. You could say that +((TCP)) provides an ((abstraction)) of the network. + +{{id web}} +## The Web + +The _((World Wide Web))_ (not to be confused with the ((Internet)) as +a whole) is a set of ((protocol))s and formats that allow us to visit +web pages in a browser. The “Web” part in the name refers to the fact +that such pages can easily link to each other, thus connecting into a +huge ((mesh)) that users can move through. + +{{index HTTP}} + +{{indexsee "Hypertext Transfer Prototol", HTTP}} + +To add content to the Web, all you need to do is connect a machine to +the ((Internet)), and have it listen on port 80, using the +_Hypertext Transfer Protocol_ (HTTP). This protocol allows other computers +to request documents over the ((network)). + +{{index URL}} + +{{indexsee "Uniform Resource Locator", URL}} + +Each ((document)) on the Web is named by a _Uniform Resource +Locator_ (URL), which looks something like this: + +```null + http://eloquentjavascript.net/12_browser.html + | | | | + protocol server path +``` + +{{index HTTPS}} + +The first part tells us that this URL uses the HTTP +((protocol)) (as opposed to, for example, encrypted HTTP, which would +be _https://_). Then comes the part that identifies which ((server)) +we are requesting the document from. Last is a path string that +identifies the specific document (or _((resource))_) we are interested +in. + +Each machine connected to the Internet gets a unique _((IP address))_, +which looks something like `37.187.37.82`. You can use these directly +as the server part of a ((URL)). But lists of more or less random +numbers are hard to remember and awkward to type, so you can instead +register a _((domain)) name_ to point toward a specific machine or +set of machines. I registered _eloquentjavascript.net_ to point at the +IP address of a machine I control and can thus use that domain name +to serve web pages. + +{{index browser}} + +If you type the previous URL into your browser's ((address +bar)), it will try to retrieve and display the ((document)) at that +URL. First, your browser has to find out what address +_eloquentjavascript.net_ refers to. Then, using the ((HTTP)) protocol, +it makes a connection to the server at that address and asks for the +resource _/12_browser.html_. + +We will take a closer look at the HTTP protocol in +[Chapter 17](17_http.html#http). + +## HTML + +{{index HTML}} + +{{indexsee "Hypertext Markup Language", HTML}} + +HTML, which stands for _Hypertext Markup Language_, is the +document format used for web pages. An HTML document contains +((text)), as well as _((tag))s_ that give structure to the text, +describing things such as links, paragraphs, and headings. + +A simple HTML document looks like this: + +```text/html + + + + My home page + + +

    My home page

    +

    Hello, I am Marijn and this is my home page.

    +

    I also wrote a book! Read it + here.

    + + +``` + +{{if book + +This is what such a document would look like in the browser: + +{{figure {url: "img/home-page.png", alt: "My home page",width: "6.3cm"}}} + +if}} + +{{index "angle brackets"}} + +The tags, wrapped in angle brackets (`<` +and `>`), provide information about the ((structure)) of the +document. The other ((text)) is just plain text. + +{{index doctype, version}} + +The document starts with ``, +which tells the browser to interpret it as _modern_ HTML, as opposed +to various dialects that were in use in the past. + +{{index "head (HTML tag)", "body (HTML tag)", "title (HTML tag)", "h1 (HTML tag)", "p (HTML tag)"}} + +HTML documents have a head and a body. +The head contains information _about_ the document, and the body +contains the document itself. In this case, we first declared that the +title of this document is “My home page” and then gave a document +containing a heading (`

    `, meaning “heading 1”—_

    _ to `

    ` +produce more minor headings) and two ((paragraph))s (`

    `). + +{{index "href attribute", "a (HTML tag)"}} + +Tags come in several forms. An +((element)), such as the body, a paragraph, or a link, is started by +an _((opening tag))_ like `

    ` and ended by a _((closing tag))_ like +`

    `. Some opening tags, such as the one for the ((link)) (``), +contain extra information in the form of `name="value"` pairs. These +are called _((attribute))s_. In this case, the destination of the link +is indicated with `href="http://eloquentjavascript.net"`, where `href` +stands for “hypertext reference”. + +{{index "src attribute", "self-closing tag", "img (HTML tag)"}} + +Some +kinds of ((tag))s do not enclose anything and thus do not need to be +closed. An example of this would be ``, which will display the ((image)) +found at the given source URL. + +{{index [escaping, "in HTML"]}} + +To be able to include ((angle brackets)) in +the text of a document, even though they have a special meaning in +HTML, yet another form of special notation has to be introduced. A +plain opening angle bracket is written as `<` (“less than”), and +a closing bracket is written as `>` (“greater than”). In HTML, an ampersand +(`&`) character followed by a word and a semicolon (`;`) is called an +_((entity))_, and will be replaced by the character it encodes. + +{{index "backslash character", "ampersand character", "double-quote character"}} + +This is analogous to the way backslashes are used in +JavaScript strings. Since this mechanism gives ampersand characters a +special meaning, too, those need to be escaped as `&`. Inside an +attribute, which is wrapped in double quotes, `"` can be used to +insert an actual quote character. + +{{index "error tolerance", parsing}} + +HTML is parsed in a remarkably +error-tolerant way. When tags that should be there are missing, the +browser reconstructs them. The way in which this is done has been +standardized, and you can rely on all modern browsers to do it in the +same way. + +The following document will be treated just like the one shown previously: + +```text/html + + +My home page + +

    My home page

    +

    Hello, I am Marijn and this is my home page. +

    I also wrote a book! Read it + here. +``` + +{{index "title (HTML tag)", "head (HTML tag)", "body (HTML tag)", "html (HTML tag)"}} + +The ``, ``, and `` tags +are gone completely. The browser knows that `` belongs in a +head, and that `<h1>` in a body. Furthermore, I am no longer explicitly +closing the paragraphs since opening a new paragraph or ending the +document will close them implicitly. The quotes around the link target +are also gone. + +This book will usually omit the `<html>`, `<head>`, and `<body>` tags +from examples to keep them short and free of clutter. But I _will_ +close tags and include quotes around attributes. + +{{index browser}} + +I will also usually omit the ((doctype)). This is not to +be taken as an encouragement to omit doctype declarations. Browsers +will often do ridiculous things when you forget them. You should +consider doctypes implicitly present in examples, even when they are +not actually shown in the text. + +{{id script_tag}} +## HTML and JavaScript + +{{index [JavaScript, "in HTML"], "script (HTML tag)"}} + +In the context of this +book, the most important ((HTML)) tag is `<script>`. This tag allows +us to include a piece of JavaScript in a document. + +```text/html +<h1>Testing alert</h1> +<script>alert("hello!");</script> +``` + +{{index "alert function", timeline}} + +Such a script will run as soon as +its `<script>` tag is encountered as the browser reads the HTML. The +page shown earlier will pop up an `alert` dialog when opened. + +{{index "src attribute"}} + +Including large programs directly in HTML documents +is often impractical. The `<script>` tag can be given an `src` +attribute in order to fetch a script file (a text file containing a +JavaScript program) from a URL. + +```text/html +<h1>Testing alert</h1> +<script src="code/hello.js"></script> +``` + +The _code/hello.js_ file included here contains the same simple program, +`alert("hello!")`. When an HTML page references other URLs as part of +itself, for example an image file or a script—web browsers will +retrieve them immediately and include them in the page. + +{{index "script (HTML tag)", "closing tag"}} + +A script tag must always be +closed with `</script>`, even if it refers to a script file and +doesn't contain any code. If you forget this, the rest of the page +will be interpreted as part of the script. + +{{index "button (HTML tag)", "onclick attribute"}} + +Some attributes can also +contain a JavaScript program. The `<button>` tag shown next (which shows up +as a button) has an `onclick` attribute, whose content will be run +whenever the button is clicked. + +```text/html +<button onclick="alert('Boom!');">DO NOT PRESS</button> +``` + +{{index "single-quote character", [escaping, "in HTML"]}} + +Note that I had to +use single quotes for the string in the `onclick` attribute because +double quotes are already used to quote the whole attribute. I could +also have used `"`, but that'd make the program harder to read. + +## In the sandbox + +{{index "malicious script", "World Wide Web", browser, website, security}} + +Running programs +downloaded from the ((Internet)) is potentially dangerous. You do not +know much about the people behind most sites you visit, and they do +not necessarily mean well. Running programs by people who do not mean +well is how you get your computer infected by ((virus))es, your data +stolen, and your accounts hacked. + +Yet the attraction of the Web is that you can surf it without +necessarily ((trust))ing all the pages you visit. This is why browsers +severely limit the things a JavaScript program may do: it can't look +at the files on your computer or modify anything not related to the +web page it was embedded in. + +{{index isolation}} + +Isolating a programming environment in this way is +called _((sandbox))ing_, the idea being that the program is harmlessly +playing in a sandbox. But you should imagine this particular kind of +sandbox as having a cage of thick steel bars over it, which makes it +somewhat different from your typical playground sandbox. + +The hard part of sandboxing is allowing the programs enough room to be +useful yet at the same time restricting them from doing anything +dangerous. Lots of useful functionality, such as communicating with +other servers or reading the content of the copy-paste ((clipboard)), +can also be used to do problematic, ((privacy))-invading things. + +{{index leak, exploit, security}} + +Every now and then, someone comes +up with a new way to circumvent the limitations of a ((browser)) and +do something harmful, ranging from leaking minor private information +to taking over the whole machine that the browser runs on. The browser +developers respond by fixing the hole, and all is well again—that is, +until the next problem is discovered, and hopefully publicized, rather +than secretly exploited by some government or ((mafia)). + +## Compatibility and the browser wars + +{{index Microsoft, "World Wide Web"}} + +In the early stages of the +Web, a browser called ((Mosaic)) dominated the market. After a few +years, the balance had shifted to ((Netscape)), which was then, in +turn, largely supplanted by Microsoft's ((Internet Explorer)). At any +point where a single ((browser)) was dominant, that browser's vendor +would feel entitled to unilaterally invent new features for the Web. +Since most users used the same browser, ((website))s would simply +start using those features—never mind the other browsers. + +This was the dark age of ((compatibility)), often called the +_((browser wars))_. Web developers were left with not one unified Web +but two or three incompatible platforms. To make things worse, the +browsers in use around 2003 were all full of ((bug))s, and of course +the bugs were different for each ((browser)). Life was hard for people +writing web pages. + +{{index Apple, "Internet Explorer", Mozilla}} + +Mozilla ((Firefox)), a +not-for-profit offshoot of ((Netscape)), challenged Internet +Explorer's hegemony in the late 2000s. Because ((Microsoft)) was not +particularly interested in staying competitive at the time, Firefox +took quite a chunk of market share away from it. Around the same +time, ((Google)) introduced its ((Chrome)) browser, and Apple's +((Safari)) browser gained popularity, leading to a situation where +there were four major players, rather than one. + +{{index compatibility}} + +The new players had a more serious attitude toward +((standards)) and better ((engineering)) practices, leading to less +incompatibility and fewer ((bug))s. Microsoft, seeing its market share +crumble, came around and adopted these attitudes. If you are starting +to learn web development today, consider yourself lucky. The latest +versions of the major browsers behave quite uniformly and have +relatively few bugs. + +{{index "World Wide Web"}} + +That is not to say that the situation is perfect +just yet. Some of the people using the Web are, for reasons of inertia +or corporate policy, stuck with very old ((browser))s. Until those +browsers die out entirely, writing websites that work for them will +require a lot of arcane knowledge about their shortcomings and quirks. +This book is not about those ((quirks)). Rather, it aims to present +the modern, sane style of ((web programming)). + diff --git a/13_dom.md b/13_dom.md new file mode 100644 index 000000000..581deb8c9 --- /dev/null +++ b/13_dom.md @@ -0,0 +1,1289 @@ +{{meta {chap_num: 13, prev_link: 12_browser, next_link: 14_event, load_files: ["code/mountains.js", "code/chapter/13_dom.js"]}}} + +# The Document Object Model + +{{index drawing, parsing}} + +When you open a web page in your browser, the browser +retrieves the page's ((HTML)) text and parses it, much like the way +our parser from [Chapter 11](11_language.html#parsing) parsed +programs. The browser builds up a model of the document's +((structure)) and then uses this model to draw the page on the screen. + +{{index "live data structure"}} + +This representation of the ((document)) +is one of the toys that a JavaScript program has +available in its ((sandbox)). You can read from the model and also change it. It acts as a +_live_ data structure: when it is modified, the page on the screen is +updated to reflect the changes. + +## Document structure + +You can imagine an ((HTML)) document as a nested set of ((box))es. +Tags such as `<body>` and `</body>` enclose other ((tag))s, which in +turn contain other tags or ((text)). Here's the example document from +the [previous chapter](12_browser.html#browser): + +```text/html sandbox-homepage +<!doctype html> +<html> + <head> + <title>My home page + + +

    My home page

    +

    Hello, I am Marijn and this is my home page.

    +

    I also wrote a book! Read it + here.

    + + +``` + +This page has the following structure: + +{{figure {url: "img/html-boxes.svg", alt: "HTML document as nested boxes",width: "7cm"}}} + +{{indexsee "Document Object Model", DOM}} + +The data structure the browser uses to represent the document follows +this shape. For each box, there is an ((object)), which we can +interact with to find out things such as what HTML tag it represents and +which boxes and text it contains. This representation is called the +_Document Object Model_, or ((DOM)) for short. + +{{index "documentElement property", "head property", "body property", "html (HTML tag)", "body (HTML tag)", "head (HTML tag)"}} + +The global variable `document` gives us access to these +objects. Its `documentElement` property refers to the object +representing the `` tag. It also provides the properties `head` and +`body`, which hold the objects for those elements. + +## Trees + +{{index [nesting, "of objects"]}} + +Think back to the ((syntax tree))s from +[Chapter 11](11_language.html#parsing) for a moment. Their +structures are strikingly similar to the structure of a browser's +document. Each _((node))_ may refer to other nodes, _children_, which +in turn may have their own children. This shape is typical of nested +structures where elements can contain sub-elements that are similar to +themselves. + +{{index "documentElement property"}} + +We call a data structure a _((tree))_ +when it has a branching structure, has no ((cycle))s (a node may not +contain itself, directly or indirectly), and has a single, +well-defined “((root))”. In the case of the ((DOM)), +`document.documentElement` serves as the root. + +{{index sorting, "data structure", "syntax tree"}} + +Trees come up a lot +in computer science. In addition to representing recursive structures such as +HTML documents or programs, they are often used to maintain +sorted ((set))s of data because elements can usually be found or +inserted more efficiently in a sorted tree than in a sorted flat +array. + +{{index "leaf node", "Egg language"}} + +A typical tree has different kinds of +((node))s. The syntax tree for link:11_language.html#language[the Egg +language] had variables, values, and application nodes. Application +nodes always have children, whereas variables and values are _leaves_, or +nodes without children. + +{{index "body property"}} + +The same goes for the DOM. Nodes for regular +_((element))s_, which represent ((HTML)) tags, determine the structure +of the document. These can have ((child node))s. An example of such a +node is `document.body`. Some of these children can be ((leaf node))s, +such as pieces of ((text)) or ((comment))s (comments are written between +`` in HTML). + +{{index "text node", "ELEMENT_NODE code", "COMMENT_NODE code", "TEXT_NODE code", "nodeType property"}} + +Each DOM node object +has a `nodeType` property, which contains a numeric code that +identifies the type of node. Regular elements have the value 1, which +is also defined as the constant property `document.ELEMENT_NODE`. Text +nodes, representing a section of text in the document, have the value +3 (`document.TEXT_NODE`). Comments have the value 8 +(`document.COMMENT_NODE`). + +So another way to visualize our document ((tree)) is as follows: + +{{figure {url: "img/html-tree.svg", alt: "HTML document as a tree",width: "8cm"}}} + +The leaves are text nodes, and the arrows indicate parent-child +relationships between nodes. + +{{id standard}} +## The standard + +{{index "programming language", [interface, design]}} + +Using cryptic numeric +codes to represent node types is not a very JavaScript-like thing to +do. Later in this chapter, we'll see that other parts of the +((DOM)) interface also feel cumbersome and alien. The reason for this +is that the DOM wasn't designed for just JavaScript. Rather, it tries +to define a language-neutral ((interface)) that can be used in other +systems as well—not just HTML but also ((XML)), which is a generic +((data format)) with an HTML-like syntax. + +{{index consistency, integration}} + +This is unfortunate. Standards are +often useful. But in this case, the advantage (cross-language +consistency) isn't all that compelling. Having an interface that is +properly integrated with the language you are using will save you more +time than having a familiar interface across languages. + +{{index "array-like object", "NodeList type"}} + +As an example of such poor +integration, consider the `childNodes` property that element nodes in +the DOM have. This property holds an array-like object, with a +`length` property and properties labeled by numbers to access the +child nodes. But it is an instance of the `NodeList` type, not a real +array, so it does not have methods such as `slice` and `forEach`. + +{{index [interface, design], [DOM, construction], "side effect"}} + +Then +there are issues that are simply poor design. For example, there is no +way to create a new node and immediately add children or attributes to +it. Instead, you have to first create it, then add the children one by +one, and finally set the attributes one by one, using side effects. Code that +interacts heavily with the DOM tends to get long, repetitive, and +ugly. + +{{index library}} + +But these flaws aren't fatal. Since JavaScript +allows us to create our own ((abstraction))s, it is easy to write some +((helper function))s that allow you to express the operations you are +performing in a clearer and shorter way. In fact, many libraries +intended for browser programming come with such tools. + +## Moving through the tree + +{{index pointer}} + +DOM nodes contain a wealth of ((link))s to other nearby +nodes. The following diagram illustrates these: + +{{figure {url: "img/html-links.svg", alt: "Links between DOM nodes",width: "6cm"}}} + +{{index "child node", "parentNode property", "childNodes property"}} + +Although the diagram shows only one link of each type, +every node has a `parentNode` property that points to its containing +node. Likewise, every element node (node type 1) has a `childNodes` +property that points to an ((array-like object)) holding its children. + +{{index "firstChild property", "lastChild property", "previousSibling property", "nextSibling property"}} + +In theory, you could move +anywhere in the tree using just these parent and child links. But +JavaScript also gives you access to a number of additional convenience +links. The `firstChild` and `lastChild` properties point to the first +and last child elements or have the value `null` for nodes without +children. Similarly, `previousSibling` and `nextSibling` point to +adjacent nodes, which are nodes with the same parent that appear immediately +before or after the node itself. For a first child, `previousSibling` +will be null, and for a last child, `nextSibling` will be null. + +{{index "talksAbout function", recursion, [nesting, "of objects"]}} + +When +dealing with a nested data structure like this one, recursive functions +are often useful. The following recursive function scans a document for ((text node))s +containing a given string and returns `true` when it has found one: + +{{id talksAbout}} +```sandbox-homepage +function talksAbout(node, string) { + if (node.nodeType == document.ELEMENT_NODE) { + for (var i = 0; i < node.childNodes.length; i++) { + if (talksAbout(node.childNodes[i], string)) + return true; + } + return false; + } else if (node.nodeType == document.TEXT_NODE) { + return node.nodeValue.indexOf(string) > -1; + } +} + +console.log(talksAbout(document.body, "book")); +// → true +``` + +{{index "nodeValue property"}} + +The `nodeValue` property of a text node refers +to the string of text that it represents. + +## Finding elements + +{{index DOM, "body property", "hard-coding"}} + +Navigating these +((link))s among parents, children, and siblings is often useful, as in +the previous function, which runs through the whole document. But if we +want to find a specific node in the document, reaching it by starting +at `document.body` and blindly following a hard-coded path of links is +a bad idea. Doing so bakes assumptions into our program about the +precise structure of the document—a structure we might want to change +later. Another complicating factor is that text nodes are created even +for the ((whitespace)) between nodes. The example document's body tag +does not have just three children (`

    ` and two `

    ` elements) but +actually has seven: those three, plus the spaces before, after, and +between them. + +{{index searching, "href attribute", "getElementsByTagName method"}} + +So +if we want to get the `href` attribute of the link in that document, +we don't want to say something like “Get the second child of the sixth +child of the document body”. It'd be better if we could say “Get the +first link in the document”. And we can. + +```sandbox-homepage +var link = document.body.getElementsByTagName("a")[0]; +console.log(link.href); +``` + +{{index "child node"}} + +All element nodes have a `getElementsByTagName` +method, which collects all elements with the given tag name that are +descendants (direct or indirect children) of the given node and +returns them as an array-like object. + +{{index "id attribute", "getElementById method"}} + +To find a specific +_single_ node, you can give it an `id` attribute and use +`document.getElementById` instead. + +```text/html +

    My ostrich Gertrude:

    +

    + + +``` + +{{index "getElementsByClassName method", "class attribute"}} + +A third, +similar method is `getElementsByClassName`, which, like +`getElementsByTagName`, searches through the contents of an element +node and retrieves all elements that have the given string in their +`class` attribute. + +## Changing the document + +{{index "side effect", "removeChild method", "appendChild method", "insertBefore method", [DOM, construction]}} + +Almost +everything about the ((DOM)) data structure can be changed. Element +nodes have a number of methods that can be used to change their +content. The `removeChild` method removes the given child node from +the document. To add a child, we can use `appendChild`, which puts it +at the end of the list of children, or `insertBefore`, which inserts +the node given as the first argument before the node given as the second +argument. + +```text/html +

    One

    +

    Two

    +

    Three

    + + +``` + +A node can exist in the document in only one place. Thus, inserting +paragraph “Three” in front of paragraph “One” will first remove it +from the end of the document and then insert it at the front, +resulting in “Three/One/Two”. All operations that insert a node +somewhere will, as a ((side effect)), cause it to be removed from its +current position (if it has one). + +{{index "insertBefore method", "replaceChild method"}} + +The `replaceChild` +method is used to replace a child node with another one. It takes as +arguments two nodes: a new node and the node to be replaced. The +replaced node must be a child of the element the method is called on. +Note that both `replaceChild` and `insertBefore` expect the _new_ node +as their first argument. + +## Creating nodes + +{{index "alt attribute", "img (HTML tag)"}} + +In the following example, we +want to write a script that replaces all ((image))s (`` tags) in +the document with the text held in their `alt` attributes, which +specifies an alternative textual representation of the image. + +{{index "createTextNode method"}} + +This involves not only removing the images +but adding a new text node to replace them. For this, we use the +`document.createTextNode` method. + +```text/html +

    The Cat in the + Hat.

    + +

    + + +``` + +{{index "text node"}} + +Given a string, `createTextNode` gives us a type 3 DOM +node (a text node), which we can insert into the document to make it +show up on the screen. + +{{index "live data structure", "getElementsByTagName method", "childNodes property"}} + +The loop that goes over the images +starts at the end of the list of nodes. This is necessary because the +node list returned by a method like `getElementsByTagName` (or a +property like `childNodes`) is _live_. That is, it is updated as the +document changes. If we started from the front, removing the first +image would cause the list to lose its first element so that the +second time the loop repeats, where `i` is 1, it would stop because +the length of the collection is now also 1. + +{{index "slice method"}} + +If you want a _solid_ collection of nodes, as +opposed to a live one, you can convert the collection to a real array +by calling the array `slice` method on it. + +``` +var arrayish = {0: "one", 1: "two", length: 2}; +var real = Array.prototype.slice.call(arrayish, 0); +real.forEach(function(elt) { console.log(elt); }); +// → one +// two +``` + +{{index "createElement method"}} + +To create regular ((element)) nodes (type +1), you can use the `document.createElement` method. This method takes +a tag name and returns a new empty node of the given type. + +{{index "Popper, Karl", [DOM, construction], "elt function"}} + +{{id elt}} +The +following example defines a utility `elt`, which creates an element +node and treats the rest of its arguments as children to that node. +This function is then used to add a simple attribution to a quote. + +```text/html +
    + No book can ever be finished. While working on it we learn + just enough to find it immature the moment we turn away + from it. +
    + + +``` + +{{if book + +This is what the resulting document looks like: + +{{figure {url: "img/blockquote.png", alt: "A blockquote with attribution",width: "8cm"}}} + +if}} + +## Attributes + +{{index "href attribute"}} + +Some element ((attribute))s, such as `href` for +links, can be accessed through a ((property)) of the same name on the +element's ((DOM)) object. This is the case for a limited set of +commonly used standard attributes. + +{{index "data attribute", "getAttribute method", "setAttribute method"}} + +But HTML allows you to set any attribute you want on nodes. +This can be useful because it allows you to store extra information in a +document. If you make up your own attribute names, though, such +attributes will not be present as a property on the element's node. +Instead, you'll have to use the `getAttribute` and `setAttribute` +methods to work with them. + +```text/html +

    The launch code is 00000000.

    +

    I have two feet.

    + + +``` + +I recommended prefixing the names of such made-up attributes with +`data-` to ensure they do not conflict with any other +attributes. + +{{index "programming language", "syntax highlighting example"}} + +As a simple +example, we'll write a “syntax highlighter” that looks for `
    `
    +tags (“preformatted”, used for code and similar plaintext) with a
    +`data-language` attribute and crudely tries to highlight the
    +((keyword))s for that language.
    +
    +// include_code
    +
    +```sandbox-highlight
    +function highlightCode(node, keywords) {
    +  var text = node.textContent;
    +  node.textContent = ""; // Clear the node
    +
    +  var match, pos = 0;
    +  while (match = keywords.exec(text)) {
    +    var before = text.slice(pos, match.index);
    +    node.appendChild(document.createTextNode(before));
    +    var strong = document.createElement("strong");
    +    strong.appendChild(document.createTextNode(match[0]));
    +    node.appendChild(strong);
    +    pos = keywords.lastIndex;
    +  }
    +  var after = text.slice(pos);
    +  node.appendChild(document.createTextNode(after));
    +}
    +```
    +
    +{{index "pre (HTML tag)", "syntax highlighting example", "highlightCode function"}}
    +
    +The function `highlightCode` takes a `
    ` node and a
    +((regular expression)) (with the “global” option turned on) that
    +matches the keywords of the programming language that the element
    +contains.
    +
    +{{index "strong (HTML tag)", clearing, "textContent property"}}
    +
    +The
    +`textContent` property is used to get all the ((text)) in the node
    +and is then set to an empty string, which has the effect of emptying
    +the node. We loop over all matches of the keyword expression,
    +appending the text _between_ them as regular text nodes, and the text
    +matched (the keywords) as text nodes wrapped in `` (bold) elements.
    +
    +{{index "data attribute", "getElementsByTagName method"}}
    +
    +We can
    +automatically highlight all programs on the page by looping over all
    +the `
    ` elements that have a `data-language` attribute and
    +calling `highlightCode` on each one with the correct regular
    +expression for the language.
    +
    +// include_code
    +
    +```sandbox-highlight
    +var languages = {
    +  javascript: /\b(function|return|var)\b/g /* … etc */
    +};
    +
    +function highlightAllCode() {
    +  var pres = document.body.getElementsByTagName("pre");
    +  for (var i = 0; i < pres.length; i++) {
    +    var pre = pres[i];
    +    var lang = pre.getAttribute("data-language");
    +    if (languages.hasOwnProperty(lang))
    +      highlightCode(pre, languages[lang]);
    +  }
    +}
    +```
    +
    +{{index "syntax highlighting example"}}
    +
    +Here is an example:
    +
    +```text/html sandbox-highlight
    +

    Here it is, the identity function:

    +
    +function id(x) { return x; }
    +
    + + +``` + +{{if book + +This produces a page that looks like this: + +{{figure {url: "img/highlighted.png", alt: "A highlighted piece of code",width: "4.8cm"}}} + +if}} + +{{index "getAttribute method", "setAttribute method", "className property", "class attribute"}} + +There is one commonly used attribute, +`class`, which is a ((reserved word)) in the JavaScript language. For +historical reasons—some old JavaScript implementations could not +handle property names that matched keywords or reserved words—the +property used to access this attribute is called `className`. You can +also access it under its real name, `"class"`, by using the +`getAttribute` and `setAttribute` methods. + +## Layout + +{{index layout, "block element", "inline element", "p (HTML tag)", "h1 (HTML tag)", "a (HTML tag)", "strong (HTML tag)"}} + +You +might have noticed that different types of elements are laid out +differently. Some, such as paragraphs (`

    `) or headings (`

    `), +take up the whole width of the document and are rendered on separate +lines. These are called _block_ elements. Others, such as links +(``) or the `` element used in the previous example, are +rendered on the same line with their surrounding text. Such elements +are called _inline_ elements. + +{{index drawing}} + +For any given document, browsers are able to compute a +layout, which gives each element a size and position based on its +type and content. This layout is then used to actually draw the +document. + +{{index "border (CSS)", "offsetWidth property", "offsetHeight property", "clientWidth property", "clientHeight property", dimensions}} + +The size and position of an element can be +accessed from JavaScript. The `offsetWidth` and `offsetHeight` +properties give you the space the element takes up in _((pixel))s_. A +pixel is the basic unit of measurement in the browser and typically +corresponds to the smallest dot that your screen can display. +Similarly, `clientWidth` and `clientHeight` give you the size of the +space _inside_ the element, ignoring border width. + +```text/html +

    + I'm boxed in +

    + + +``` + +{{if book + +Giving a paragraph a border causes a rectangle to be drawn around it. + +{{figure {url: "img/boxed-in.png", alt: "A paragraph with a border",width: "8cm"}}} + +if}} + +{{index "getBoundingClientRect method", position, "pageXOffset property", "pageYOffset property"}} + +{{id boundingRect}} +The most effective way to find +the precise position of an element on the screen is the +`getBoundingClientRect` method. It returns an object with `top`, +`bottom`, `left`, and `right` properties, indicating the pixel +positions of the sides of the element relative to the top left of the +screen. If you want them relative to the whole document, you must +add the current scroll position, found under the global `pageXOffset` +and `pageYOffset` variables. + +{{index "offsetHeight property", "getBoundingClientRect method", drawing, laziness, performance, efficiency}} + +Laying +out a document can be quite a lot of work. In the interest of speed, +browser engines do not immediately re-layout a document every time it +is changed but rather wait as long as they can. When a JavaScript +program that changed the document finishes running, the browser will +have to compute a new layout in order to display the changed document +on the screen. When a program _asks_ for the position or size of +something by reading properties such as `offsetHeight` or calling +`getBoundingClientRect`, providing correct information also requires +computing a ((layout)). + +{{index "side effect", optimization, benchmark}} + +A program that +repeatedly alternates between reading DOM layout information and +changing the DOM forces a lot of layouts to happen and will +consequently run really slowly. The following code shows an example of +this. It contains two different programs that build up a line of _X_ +characters 2,000 pixels wide and measures the time each one takes. + +{{test nonumbers}} + +```text/html +

    +

    + + +``` + +## Styling + +{{index "block element", "inline element", style, "strong (HTML tag)", "a (HTML tag)", underline}} + +We have seen that different +HTML elements display different behavior. Some are displayed as +blocks, others inline. Some add styling, such as `` making its +content ((bold)) and `
    ` making it blue and underlining it. + +{{index "img (HTML tag)", "default behavior", "style attribute"}} + +The way +an `` tag shows an image or an `` tag causes a link to be +followed when it is clicked is strongly tied to the element type. But +the default styling associated with an element, such as the text color +or underline, can be changed by us. Here is an example using the `style` +property: + +```text/html +

    Normal link

    +

    Green link

    +``` + +{{if book + +The second link will be green instead of the default link color. + +{{figure {url: "img/colored-links.png", alt: "A normal and a green link",width: "2.2cm"}}} + +if}} + +{{index "border (CSS)", "color (CSS)", CSS, "colon character"}} + +A +style attribute may contain one or more _((declaration))s_, which are +a property (such as `color`) followed by a colon and a value (such as +`green`). When there is more than one declaration, they must be +separated by ((semicolon))s, as in `"color: red; border: none"`. + +{{index "display (CSS)", layout}} + +There are a lot of aspects that can be +influenced by styling. For example, the `display` property controls +whether an element is displayed as a block or an inline element. + +```text/html +This text is displayed inline, +as a block, and +not at all. +``` + +{{index "hidden element"}} + +The `block` tag will end up on its own line since +((block element))s are not displayed inline with the text around them. +The last tag is not displayed at all—`display: none` prevents an +element from showing up on the screen. This is a way to hide elements. +It is often preferable to removing them from the document +entirely because it makes it easy to reveal them again at a later time. + +{{if book + +{{figure {url: "img/display.png", alt: "Different display styles",width: "4cm"}}} + +if}} + +{{index "color (CSS)", "style attribute"}} + +JavaScript code can directly +manipulate the style of an element through the node's `style` +property. This property holds an object that has properties for all +possible style properties. The values of these properties are strings, +which we can write to in order to change a particular aspect of the +element's style. + +```text/html +

    + Pretty text +

    + + +``` + +{{index "camel case", capitalization, "dash character", "font-family (CSS)"}} + +Some style property names contain dashes, such as `font-family`. +Because such property names are awkward to work with in JavaScript +(you'd have to say `style["font-family"]`), the property names in the +`style` object for such properties have their dashes removed and the +letters that follow them capitalized (`style.fontFamily`). + +## Cascading styles + +{{index "rule (CSS)", "style (HTML tag)"}} + +{{indexsee "Cascading Style Sheets", CSS}} + +The styling system for HTML is called ((CSS)) +for _Cascading Style Sheets_. A _((style sheet))_ is a set of +rules for how to style elements in a document. It can be given +inside a ` +

    Now strong text is italic and gray.

    +``` + +{{index "rule (CSS)", "font-weight (CSS)", overlay}} + +The _((cascading))_ in the name +refers to the fact that multiple such rules are combined to +produce the final style for an element. In the previous example, the +default styling for `` tags, which gives them `font-weight: +bold`, is overlaid by the rule in the ` + + +``` +if}} + +{{hint + +{{index "createElement method", "table example", "appendChild method"}} + +Use `document.createElement` to create new element nodes, +`document.createTextNode` to create text nodes, and the `appendChild` +method to put nodes into other nodes. + +You should loop over the key names once to fill in the top row and +then again for each object in the array to construct the data +rows. + +Don't forget to return the enclosing `` element at the end of +the function. + +hint}} + +### Elements by tag name + +{{index "getElementsByTagName method", recursion}} + +The +`getElementsByTagName` method returns all child elements with a given +tag name. Implement your own version of it as a regular nonmethod +function that takes a node and a string (the tag name) as arguments +and returns an array containing all descendant element nodes with the +given tag name. + +{{index "tagName property", capitalization, "toLowerCase method", "toUpperCase method"}} + +To find the tag name of an element, +use its `tagName` property. But note that this will return the tag +name in all uppercase. Use the `toLowerCase` or `toUpperCase` string +method to compensate for this. + +{{if interactive + +{{test no}} + +```text/html +

    Heading with a span element.

    +

    A paragraph with one, two + spans.

    + + +``` +if}} + +{{hint + +{{index "getElementsByTagName method", recursion}} + +The solution is most +easily expressed with a recursive function, similar to the +[`talksAbout` function](13_dom.html#talksAbout) defined earlier in +this chapter. + +{{index concatenation, "concat method", closure}} + +You could call +`byTagname` itself recursively, concatenating the resulting arrays to +produce the output. For a more efficient approach, define an inner +function that calls itself recursively and that has access to an +array variable defined in the outer function to which it can add the +matching elements it finds. Don't forget to call the ((inner +function)) once from the outer function. + +{{index "nodeType property", "ELEMENT_NODE code"}} + +The recursive function +must check the node type. Here we are interested only in node type 1 +(`document.ELEMENT_NODE`). For such nodes, we must loop over their +children and, for each child, see whether the child matches the query while also doing +a recursive call on it to inspect its own children. + +hint}} + +### The cat's hat + +{{index "cat's hat (exercise)"}} + +Extend the cat ((animation)) defined +[earlier](13_dom.html#animation) so that both the cat and his hat +(``) orbit at opposite sides of the ellipse. + +Or make the hat circle around the cat. Or alter the animation in some +other interesting way. + +{{index "absolute positioning", "top (CSS)", "left (CSS)", "position (CSS)"}} + +To make positioning multiple objects easier, it is probably a +good idea to switch to absolute positioning. This means that `top` and +`left` are counted relative to the top left of the document. To avoid +using negative coordinates, you can simply add a fixed number of +pixels to the position values. + +{{if interactive + +{{test no}} + +```text/html + + + + +``` + +if}} + diff --git a/14_event.md b/14_event.md new file mode 100644 index 000000000..9f3f4f049 --- /dev/null +++ b/14_event.md @@ -0,0 +1,1238 @@ +{{meta {chap_num: 14, prev_link: 13_dom, next_link: 15_game}}} + +# Handling Events + +{{quote {author: "Marcus Aurelius,Meditations", chapter: true} + +You have power over your mind—not +outside events. Realize this, and you will find strength. + +quote}} + +{{index stoicism, "Marcus Aurelius", input, timeline, "control flow"}} + +Some programs work with direct user input, such as mouse and +keyboard interaction. The timing and order of such input can't be +predicted in advance. This requires a different approach to control +flow than the one we have used so far. + +## Event handlers + +{{index polling, button, "real-time"}} + +Imagine an interface where the +only way to find out whether a key on the keyboard is being pressed is to read the +current state of that key. To be able to react to keypresses, +you would have to constantly read the key's state so that +you'd catch it before it's released again. It would be dangerous to +perform other time-intensive computations since you might miss a +keypress. + +That is how such input was handled on primitive machines. A step +up would be for the hardware or operating system to notice the +keypress and put it in a queue. A program can then periodically check the +queue for new events and react to what it finds there. + +{{index responsiveness, "user experience"}} + +Of course, it has to remember +to look at the queue, and to do it often, because any time between the +key being pressed and the program noticing the event will cause the +software to feel unresponsive. This approach is called _((polling))_. +Most programmers avoid it whenever possible. + +{{index "callback function", "event handling"}} + +A better mechanism is for +the underlying system to give our code a chance to react +to events as they occur. Browsers do this by allowing us to register +functions as _handlers_ for specific events. + +```text/html +

    Click this document to activate the handler.

    + +``` + +{{index "click event", "addEventListener method"}} + +The `addEventListener` +function registers its second argument to be called whenever the event +described by its first argument occurs. + +## Events and DOM nodes + +{{index "addEventListener method", "event handling"}} + +Each ((browser)) +event handler is registered in a context. When you call +`addEventListener` as shown previously, you are calling it as a method on the +whole ((window)) because in the browser the ((global scope)) is +equivalent to the `window` object. Every ((DOM)) element has its own +`addEventListener` method, which allows you to listen specifically on +that element. + +```text/html + +

    No handler here.

    + +``` + +{{index "click event", "button (HTML tag)"}} + +The example attaches a handler +to the button node. Thus, clicks on the button cause that handler to +run, whereas clicks on the rest of the document do not. + +{{index "onclick attribute", encapsulation}} + +Giving a node an `onclick` +attribute has a similar effect. But a node has only one `onclick` +attribute, so you can register only one handler per node that way. The +`addEventListener` method allows you to add any number of handlers, so +you can't accidentally replace a handler that has already been +registered. + +{{index "removeEventListener method"}} + +The `removeEventListener` method, +called with arguments similar to as `addEventListener`, removes a +handler. + +```text/html + + +``` + +{{index [function, "as value"]}} + +To be able to unregister a handler function, we +give it a name (such as `once`) so that we +can pass it to both `addEventListener` and `removeEventListener`. + +## Event objects + +{{index "which property", "event handling"}} + +Though we have ignored it in +the previous examples, event handler functions are passed an argument: +the _((event object))_. This object gives us additional information +about the event. For example, if we want to know _which_ ((mouse +button)) was pressed, we can look at the event object's `which` property. + +```text/html + + +``` + +{{index "event type", "type property"}} + +The information stored in an event +object differs per type of event. We'll discuss various types later +in this chapter. The object's `type` property always holds a string +identifying the event (for example `"click"` or `"mousedown"`). + +## Propagation + +{{index "event propagation", "parent node"}} + +{{indexsee bubbling, "event propagation"}} + +{{indexsee propagation, "event propagation"}} + +Event handlers registered on +nodes with children will also receive some events that happen in the +children. If a button inside a paragraph is clicked, event handlers on +the paragraph will also receive the click event. + +{{index "event handling"}} + +But if both the paragraph and the button have a +handler, the more specific handler—the one on the button—gets to go +first. The event is said to _propagate_ outward, from the node where +it happened to that node's parent node and on to the root of the +document. Finally, after all handlers registered on a specific node +have had their turn, handlers registered on the whole ((window)) get a +chance to respond to the event. + +{{index "stopPropagation method", "click event"}} + +At any point, an event +handler can call the `stopPropagation` method on the event object to +prevent handlers “further up” from receiving the event. This can be +useful when, for example, you have a button inside another clickable +element and you don't want clicks on the button to activate the outer +element's click behavior. + +{{index "mousedown event"}} + +The following example registers `"mousedown"` +handlers on both a button and the paragraph around it. When clicked +with the right mouse button, the handler for the button calls +`stopPropagation`, which will prevent the handler on the paragraph +from running. When the button is clicked with another ((mouse +button)), both handlers will run. + +```text/html +

    A paragraph with a .

    + +``` + +{{index "event propagation", "target property"}} + +Most event objects have a +`target` property that refers to the node where they originated. You +can use this property to ensure that you're not accidentally handling +something that propagated up from a node you do not want to handle. + +It is also possible to use the `target` property to cast a wide net +for a specific type of event. For example, if you have a node +containing a long list of buttons, it may be more convenient to +register a single click handler on the outer node and have it use the +`target` property to figure out whether a button was clicked, rather +than register individual handlers on all of the buttons. + +```text/html + + + + +``` + +## Default actions + +{{index scrolling, "default behavior", "event handling"}} + +Many events +have a default action associated with them. If you click a ((link)), +you will be taken to the link's target. If you press the down arrow, +the browser will scroll the page down. If you right-click, you'll get +a context menu. And so on. + +{{index "preventDefault method"}} + +For most types of events, the JavaScript +event handlers are called _before_ the default behavior is performed. +If the handler doesn't want the normal behavior to happen, typically +because it has already taken care of handling the event, it can call +the `preventDefault` method on the event object. + +{{index expectation}} + +This can be used to implement your own ((keyboard)) +shortcuts or ((context menu)). It can also be used to obnoxiously +interfere with the behavior that users expect. For example, here is a +link that cannot be followed: + +```text/html +MDN + +``` + +Try not to do such things unless you have a really good reason to. For +people using your page, it can be unpleasant when the behavior +they expect is broken. + +Depending on the browser, some events can't be intercepted. On +Chrome, for example, ((keyboard)) shortcuts to close the current tab +(Ctrl-W or Command-W) cannot be handled by JavaScript. + +## Key events + +{{index keyboard, "keydown event", "keyup event", "event handling"}} + +When a key on the keyboard is pressed, your browser fires a +`"keydown"` event. When it is released, a `"keyup"` event fires. + +```text/html focus +

    This page turns violet when you hold the V key.

    + +``` + +{{index "repeating key"}} + +Despite its name, `"keydown"` fires not only +when the key is physically pushed down. When a key is pressed and +held, the event fires again every time the key _repeats_. +Sometimes—for example if you want to increase the acceleration of a +((game)) character when an arrow key is pressed and decrease it again +when the key is released—you have to be careful not to increase it +again every time the key repeats or you'd end up with unintentionally +huge values. + +{{index "keyCode property", "key code"}} + +The previous example looked at the +`keyCode` property of the event object. This is how you can identify +which key is being pressed or released. Unfortunately, it's not +always obvious how to translate the numeric key code to an actual +key. + +{{index "event object", "charCodeAt method"}} + +For letter and number keys, +the associated key code will be the ((Unicode)) character code +associated with the (uppercase) letter or number printed on the key. +The `charCodeAt` method on ((string))s gives us a way to find this +code. + +``` +console.log("Violet".charCodeAt(0)); +// → 86 +console.log("1".charCodeAt(0)); +// → 49 +``` + +Other keys have less predictable ((key code))s. The best way to find +the codes you need is usually by ((experiment))ing—register a key event +handler that logs the key codes it gets and press the key you are +interested in. + +{{index "modifier key", "shift key", "control key", "alt key", "meta key", "command key", "ctrlKey property", "shiftKey property", "altKey property", "metaKey property"}} + +Modifier keys +such as Shift, Ctrl, Alt, and Meta (Command on Mac) generate key +events just like normal keys. But when looking for key combinations, +you can also find out whether these keys are held down by looking +at the `shiftKey`, `ctrlKey`, `altKey`, and `metaKey` properties of +keyboard and mouse events. + +```text/html focus +

    Press Ctrl-Space to continue.

    + +``` + +{{index typing, "fromCharCode function", "charCode property", "keydown event", "keyup event", "keypress event"}} + +The +`"keydown"` and `"keyup"` events give you information about the +physical key that is being pressed. But what if you are interested in +the actual ((text)) being typed? Getting that text from key codes is +awkward. Instead, there exists another event, `"keypress"`, which +fires right after `"keydown"` (and repeated along with `"keydown"` +when the key is held) but only for keys that produce character input. +The `charCode` property in the event object contains a code that can +be interpreted as a ((Unicode)) character code. We can use the +`String.fromCharCode` function to turn this code into an +actual single-((character)) ((string)). + +```text/html focus +

    Focus this page and type something.

    + +``` + +{{index "button (HTML tag)", "tabindex attribute"}} + +The ((DOM)) node where +a key event originates depends on the element that has ((focus)) when +the key is pressed. Normal nodes cannot have focus (unless you give +them a `tabindex` attribute), but things such as ((link))s, buttons, and +form fields can. We'll come back to form ((field))s in +[Chapter 18](18_forms.html#forms). When nothing in particular has +focus, `document.body` acts as the target node of key events. + +## Mouse clicks + +{{index "mousedown event", "mouseup event", "mouse cursor"}} + +Pressing a +((mouse button)) also causes a number of events to fire. The +`"mousedown"` and `"mouseup"` events are similar to `"keydown"` and +`"keyup"` and fire when the button is pressed and released. +These will happen on the DOM nodes that are immediately below the +mouse pointer when the event occurs. + +{{index "click event"}} + +After the `"mouseup"` event, a `"click"` event +fires on the most specific node that contained both the press and the +release of the button. For example, if I press down the mouse button +on one paragraph and then move the pointer to another paragraph and +release the button, the `"click"` event will happen on the element +that contains both those paragraphs. + +{{index "dblclick event", "double click"}} + +If two clicks happen close +together, a `"dblclick"` (double-click) event also fires, after the +second click event. + +{{index pixel, "pageX property", "pageY property", "event object"}} + +To get precise information about the place where a mouse +event happened, you can look at its `pageX` and `pageY` properties, +which contain the event's ((coordinates)) (in pixels) relative to the +top-left corner of the document. + +{{index "border-radius (CSS)", "absolute positioning", "drawing program example"}} + +{{id mouse_drawing}} +The following implements a primitive drawing program. Every +time you click the document, it adds a dot under your mouse +pointer. See [Chapter 19](19_paint.html#paint) for a less primitive +drawing program. + +```text/html + + +``` + +{{index "clientX property", "clientY property", "getBoundingClientRect method", "event object"}} + +The `clientX` and `clientY` properties are +similar to `pageX` and `pageY` but relative to the part of the +document that is currently scrolled into view. These can be useful +when comparing mouse coordinates with the ((coordinates)) returned by +`getBoundingClientRect`, which also returns ((viewport))-relative +coordinates. + +## Mouse motion + +{{index "mousemove event"}} + +Every time the mouse pointer moves, a +`"mousemove"` event fires. This event can be used to track the +position of the mouse. A common situation in which this is useful is +when implementing some form of mouse-((dragging)) functionality. + +{{index "draggable bar example"}} + +As an example, the following program displays a +bar and sets up event handlers so that dragging to the left or right +on this bar makes it narrower or wider: + +```text/html +

    Drag the bar to change its width:

    +
    +
    + +``` + +{{if book + +The resulting page looks like this: + +{{figure {url: "img/drag-bar.png", alt: "A draggable bar",width: "5.3cm"}}} + +if}} + +{{index "mouseup event", "mousemove event"}} + +Note that the `"mousemove"` +handler is registered on the whole ((window)). Even if the mouse goes +outside of the bar during resizing, we still want to update its size +and stop dragging when the mouse is released. + +{{index "buttons property", "which property"}} + +We must stop resizing the +bar when the mouse button is released. Unfortunately, not all browsers +give `"mousemove"` events a meaningful `which` property. There is a +standard property called `buttons`, which provides similar +information, but that is also not supported on all browsers. +Fortunately, all major browsers support either `buttons` or `which`, +so the `buttonPressed` function in the example first tries `buttons`, +and falls back to `which` when that isn't available. + +{{index "mouseover event", "mouseout event"}} + +Whenever the mouse pointer +enters or leaves a node, a `"mouseover"` or `"mouseout"` event +fires. These two events can be used, among other things, to create +((hover effect))s, showing or styling something when the mouse is over +a given element. + +{{index "event propagation"}} + +Unfortunately, creating such an effect is not +as simple as starting the effect on `"mouseover"` and ending it on +`"mouseout"`. When the mouse moves from a node onto one of its +children, `"mouseout"` fires on the parent node, though the mouse +did not actually leave the node's extent. To make things worse, these +events propagate just like other events, and thus you will also +receive `"mouseout"` events when the mouse leaves one of the ((child +node))s of the node on which the handler is registered. + +{{index "isInside function", "relatedTarget property", "target property"}} + +To work around this problem, we can use the `relatedTarget` +property of the event objects created for these events. It tells us, +in the case of `"mouseover"`, what element the pointer was over +before and, in the case of `"mouseout"`, what element it is going to. +We want to change our hover effect only when the `relatedTarget` is +outside of our target node. Only in that case does this event actually +represent a _crossing over_ from outside to inside the node (or the +other way around). + +```text/html +

    Hover over this paragraph.

    + +``` + +The `isInside` function follows the given node's parent links until it +either reaches the top of the document (when `node` becomes null) or +finds the parent we are looking for. + +I should add that a ((hover effect)) like this can be much more easily +achieved using the ((CSS)) _((pseudoselector))_ `:hover`, as the next +example shows. But when your hover effect involves doing something +more complicated than changing a style on the target node, you must use the trick +with `"mouseover"` and `"mouseout"` events. + +```text/html + +

    Hover over this paragraph.

    +``` + +## Scroll events + +{{index scrolling, "scroll event", "event handling"}} + +Whenever an +element is scrolled, a `"scroll"` event fires on it. This has +various uses, such as knowing what the user is currently looking at +(for disabling off-screen ((animation))s or sending ((spy)) reports to +your evil headquarters) or showing some indication of progress (by +highlighting part of a table of contents or showing a page number). + +The following example draws a ((progress bar)) in the top-right corner of +the document and updates it to fill up as you scroll down: + +```text/html + +
    +

    Scroll me...

    + +``` + +{{index "unit (CSS)", scrolling, "position (CSS)", "fixed positioning", "absolute positioning", percent}} + +Giving an element +a `position` of `fixed` acts much like an `absolute` position but +also prevents it from scrolling along with the rest of the document. +The effect is to make our progress bar stay in its corner. Inside +it is another element, which is resized to indicate the current +progress. We use `%`, rather than `px`, as a unit when setting the +width so that the element is sized relative to the whole bar. + +{{index "innerHeight property", "innerWidth property", "pageYOffset property"}} + +The global `innerHeight` variable gives us the height of +the window, which we have to subtract from the total scrollable +height—you can't keep scrolling when you hit the bottom of the +document. (There's also an `innerWidth` to go along with +`innerHeight`.) By dividing `pageYOffset`, the current scroll +position, by the maximum scroll position and multiplying by 100, +we get the percentage for the progress bar. + +{{index "preventDefault method"}} + +Calling `preventDefault` on a scroll event +does not prevent the scrolling from happening. In fact, the event +handler is called only _after_ the scrolling takes place. + +## Focus events + +{{index "event handling", "focus event", "blur event"}} + +When an element +gains ((focus)), the browser fires a `"focus"` event on it. When it +loses focus, a `"blur"` event fires. + +{{index "event propagation"}} + +Unlike the events discussed earlier, these two +events do not propagate. A handler on a parent element is not notified +when a child element gains or loses focus. + +{{index "input (HTML tag)", "help text example"}} + +The following example +displays help text for the ((text field)) that currently has +focus: + +```text/html +

    Name:

    +

    Age:

    +

    + + +``` + +{{if book + +In the following screenshot, the help text for the age field is shown. + +{{figure {url: "img/help-field.png", alt: "Providing help when a field is focused",width: "4.4cm"}}} + +if}} + +{{index "focus event", "blur event"}} + +The ((window)) object will receive +`"focus"` and `"blur"` events when the user moves from or to the +browser tab or window in which the document is shown. + +## Load event + +{{index "script (HTML tag)", "load event"}} + +When a page finishes loading, +the `"load"` event fires on the window and the document body +objects. This is often used to schedule ((initialization)) actions +that require the whole ((document)) to have been built. Remember that +the content of ` +``` + +{{index "clearTimeout function"}} + +Sometimes you need to cancel a function you +have scheduled. This is done by storing the value returned by +`setTimeout` and calling `clearTimeout` on it. + +``` +var bombTimer = setTimeout(function() { + console.log("BOOM!"); +}, 500); + +if (Math.random() < 0.5) { // 50% chance + console.log("Defused."); + clearTimeout(bombTimer); +} +``` + +{{index "cancelAnimationFrame function", "requestAnimationFrame function"}} + +The `cancelAnimationFrame` function works in the same way +as _clearTimeout_—calling it on a value returned by +`requestAnimationFrame` will cancel that frame (assuming it hasn't +already been called). + +{{index "setInterval function", "clearInterval function", repetition}} + +A similar set of functions, `setInterval` +and `clearInterval` are used to set timers that should repeat every _X_ +milliseconds. + +``` +var ticks = 0; +var clock = setInterval(function() { + console.log("tick", ticks++); + if (ticks == 10) { + clearInterval(clock); + console.log("stop."); + } +}, 200); +``` + +## Debouncing + +{{index optimization, "mousemove event", "scroll event", blocking}} + +Some types of events have the potential to fire +rapidly, many times in a row (the `"mousemove"` and `"scroll"` events, +for example). When handling such events, you must be careful not to do +anything too time-consuming or your handler will take up so much time +that interaction with the document starts to feel slow and choppy. + +{{index "setTimeout function"}} + +If you do need to do something nontrivial in +such a handler, you can use `setTimeout` to make sure you are not +doing it too often. This is usually called _((debouncing))_ the event. +There are several slightly different approaches to this. + +{{index "textarea (HTML tag)", "clearTimeout function", "keydown event"}} + +In the first example, we want to do something when the user +has typed something, but we don't want to do it immediately for every +key event. When they are ((typing)) quickly, we just want to wait +until a pause occurs. Instead of immediately performing an action in +the event handler, we set a timeout instead. We also clear the +previous timeout (if any) so that when events occur close together +(closer than our timeout delay), the timeout from the previous event +will be canceled. + +```text/html + + +``` + +{{index "sloppy programming"}} + +Giving an undefined value to `clearTimeout` or +calling it on a timeout that has already fired has no effect. Thus, we +don't have to be careful about when to call it, and we simply do so +for every event. + +{{index "mousemove event"}} + +We can use a slightly different pattern if we +want to space responses so that they're separated by at least a +certain length of ((time)) but want to fire them _during_ a series of +events, not just afterward. For example, we might want to respond to +`"mousemove"` events by showing the current coordinates of the mouse, +but only every 250 milliseconds. + +```text/html + +``` + +## Summary + +Event handlers make it possible to detect and react to events we have +no direct control over. The `addEventListener` method is used to +register such a handler. + +Each event has a type (`"keydown"`, `"focus"`, and so on) that identifies +it. Most events are called on a specific DOM element and then +_propagate_ to that element's ancestors, allowing handlers associated +with those elements to handle them. + +When an event handler is called, it is passed an event object with +additional information about the event. This object also has methods +that allow us to stop further propagation (`stopPropagation`) and +prevent the browser's default handling of the event +(`preventDefault`). + +Pressing a key fires `"keydown"`, `"keypress"`, and `"keyup"` events. +Pressing a mouse button fires `"mousedown"`, `"mouseup"`, and +`"click"` events. Moving the mouse fires `"mousemove"` and possibly +`"mouseenter"` and `"mouseout"` events. + +Scrolling can be detected with the `"scroll"` event, and focus changes +can be detected with the `"focus"` and `"blur"` events. When the document finishes +loading, a `"load"` event fires on the window. + +Only one piece of JavaScript program can run at a time. Thus, event +handlers and other scheduled scripts have to wait until other scripts +finish before they get their turn. + +## Exercises + +### Censored keyboard + +{{index Turkish, Kurds, "censored keyboard (exercise)"}} + +Between 1928 +and 2013, Turkish law forbade the use of the letters _Q_, _W_, and _X_ +in official documents. This was part of a wider initiative to stifle +Kurdish culture—those letters occur in the language used by Kurdish +people but not in Istanbul Turkish. + +{{index typing, "input (HTML tag)"}} + +As an exercise in doing ridiculous +things with technology, I'm asking you to program a ((text field)) (an +`` tag) that these letters cannot be typed into. + +{{index clipboard}} + +(Do not worry about copy and paste and other such +loopholes.) + +{{if interactive + +{{test no}} + +```text/html + + +``` + +if}} + +{{hint + +{{index "keypress event", "keydown event", "preventDefault method", "censored keyboard (exercise)"}} + +The solution to this +exercise involves preventing the ((default behavior)) of key events. +You can handle either `"keypress"` or `"keydown"`. If either of them +has `preventDefault` called on it, the letter will not appear. + +{{index "keyCode property", "charCode property", capitalization}} + +Identifying the letter typed requires +looking at the `keyCode` or `charCode` property and comparing that +with the codes for the letters you want to filter. In `"keydown"`, you +do not have to worry about lowercase and uppercase letters, since it +identifies only the key pressed. If you decide to handle `"keypress"` +instead, which identifies the actual character typed, you have to make +sure you test for both cases. One way to do that would be this: + +```null +/[qwx]/i.test(String.fromCharCode(event.charCode)) +``` + +hint}} + +### Mouse trail + +{{index animation, "mouse trail (exercise)"}} + +In JavaScript's early days, +which was the high time of ((gaudy home pages)) with lots of animated +images, people came up with some truly inspiring ways to use the +language. + +One of these was the “mouse trail”—a series of images that would +follow the mouse pointer as you moved it across the page. + +{{index "absolute positioning", "background (CSS)"}} + +In this exercise, I +want you to implement a mouse trail. Use absolutely positioned `
    ` +elements with a fixed size and background color (refer to the +[code](14_event.html#mouse_drawing) in the “Mouse Clicks” +section for an example). Create a bunch of such elements and, when the +mouse moves, display them in the wake of the mouse pointer. + +{{index "mousemove event"}} + +There are various possible approaches here. You +can make your solution as simple or as complex as you want. A simple +solution to start with is to keep a fixed number of trail elements and +cycle through them, moving the next one to the mouse's current +position every time a `"mousemove"` event occurs. + +{{if interactive + +{{test no}} + +```text/html + + + +``` + +if}} + +{{hint + +{{index "mouse trail (exercise)"}} + +Creating the elements is best done in a +loop. Append them to the document to make them show up. To be +able to access them later to change their position, store the trail +elements in an array. + +{{index "mousemove event", [array, indexing], "remainder operator", "% operator"}} + +Cycling through them can be done by keeping a ((counter +variable)) and adding 1 to it every time the `"mousemove"` event +fires. The remainder operator (`% 10`) can then be used to get a valid +array index to pick the element you want to position during a given +event. + +{{index simulation, "requestAnimationFrame function"}} + +Another +interesting effect can be achieved by modeling a simple ((physics)) +system. Use the `"mousemove"` event only to update a pair of variables +that track the mouse position. Then use `requestAnimationFrame` to +simulate the trailing elements being attracted to the position of the +mouse pointer. At every animation step, update their position based on +their position relative to the pointer (and, optionally, a speed that +is stored for each element). Figuring out a good way to do this is up +to you. + +hint}} + +### Tabs + +{{index "tabbed interface (exercise)"}} + +A tabbed interface is a common design +pattern. It allows you to select an interface panel by choosing from +a number of tabs “sticking out” above an element. + +{{index "button (HTML tag)", "display (CSS)", "hidden element", "data attribute"}} + +In this exercise you'll implement a simple tabbed +interface. Write a function, `asTabs`, that takes a DOM node and +creates a tabbed interface showing the child elements of that node. It +should insert a list of `
    ` element. This nicely corresponds to +the structure of the `grid` property in the level—each row of the grid +is turned into a table row (`` element). The strings in the grid +are used as class names for the table cell (`
    `) elements. The +following CSS helps the resulting table look like the background we +want: + +```text/css +.background { background: rgb(52, 166, 251); + table-layout: fixed; + border-spacing: 0; } +.background td { padding: 0; } +.lava { background: rgb(255, 100, 100); } +.wall { background: white; } +``` + +{{index "padding (CSS)"}} + +Some of these (`table-layout`, `border-spacing`, +and `padding`) are simply used to suppress unwanted default behavior. +We don't want the layout of the ((table)) to depend upon the contents +of its cells, and we don't want space between the ((table)) cells or +padding inside them. + +{{index "background (CSS)", "rgb (CSS)", CSS}} + +The `background` rule +sets the background color. CSS allows colors to be specified both as +words (`white`) and with a format such as `rgb(R, G, B)`, where the red, +green, and blue components of the color are separated into three +numbers from 0 to 255. So, in `rgb(52, 166, 251)`, the red component is +52, green is 166, and blue is 251. Since the blue component is the +largest, the resulting color will be bluish. You can see that in the +`.lava` rule, the first number (red) is the largest. + +We draw each ((actor)) by creating a ((DOM)) element for it and +setting that element's position and size based on the actor's properties. The +values have to be multiplied by `scale` to go from game units to +pixels. + +// include_code + +``` +DOMDisplay.prototype.drawActors = function() { + var wrap = elt("div"); + this.level.actors.forEach(function(actor) { + var rect = wrap.appendChild(elt("div", + "actor " + actor.type)); + rect.style.width = actor.size.x * scale + "px"; + rect.style.height = actor.size.y * scale + "px"; + rect.style.left = actor.pos.x * scale + "px"; + rect.style.top = actor.pos.y * scale + "px"; + }); + return wrap; +}; +``` + +{{index "position (CSS)", "class attribute"}} + +To give an element more than one +class, we separate the class names by spaces. In the +((CSS)) code shown next, the `actor` class gives the actors their +absolute position. Their type name is used as an extra class to give +them a color. We don't have to define the `lava` class again because we reuse +the class for the lava grid squares which we defined earlier. + +```text/css +.actor { position: absolute; } +.coin { background: rgb(241, 229, 89); } +.player { background: rgb(64, 64, 64); } +``` + +{{index graphics, optimization, efficiency}} + +When it updates the +display, the `drawFrame` method first removes the old actor graphics, +if any, and then redraws them in their new positions. It may be +tempting to try to reuse the ((DOM)) elements for actors, but to make +that work, we would need a lot of additional information flow between +the display code and the simulation code. We'd need to associate +actors with DOM elements, and the ((drawing)) code must remove +elements when their actors vanish. Since there will typically be only +a handful of actors in the game, redrawing all of them is not +expensive. + +// include_code + +``` +DOMDisplay.prototype.drawFrame = function() { + if (this.actorLayer) + this.wrap.removeChild(this.actorLayer); + this.actorLayer = this.wrap.appendChild(this.drawActors()); + this.wrap.className = "game " + (this.level.status || ""); + this.scrollPlayerIntoView(); +}; +``` + +{{index level, "class attribute", "style sheet"}} + +By adding the level's +current status as a class name to the wrapper, we can style the player +actor slightly differently when the game is won or lost by adding a +((CSS)) rule that takes effect only when the player has an ((ancestor +element)) with a given class. + +```text/css +.lost .player { + background: rgb(160, 64, 64); +} +.won .player { + box-shadow: -4px -7px 8px white, 4px -7px 8px white; +} +``` + +{{index player, "box shadow (CSS)"}} + +After touching ((lava)), the +player's color turns dark red, suggesting scorching. When the last +coin has been collected, we use two blurred white box shadows, one to the top +left and one to the top right, to create a white halo effect. + +{{id viewport}} + +{{index "position (CSS)", "max-width (CSS)", "overflow (CSS)", "max-height (CSS)", viewport}} + +We can't assume that +levels always fit in the viewport. That is why the +`scrollPlayerIntoView` call is needed—it ensures that if the level is +protruding outside the viewport, we scroll that viewport to make +sure the player is near its center. The following ((CSS)) gives the +game's wrapping ((DOM)) element a maximum size and ensures that +anything that sticks out of the element's box is not visible. We also give the outer element a relative +position so that the actors inside it are positioned relative to +the level's top-left corner. + +```text/css +.game { + overflow: hidden; + max-width: 600px; + max-height: 450px; + position: relative; +} +``` + +{{index scrolling}} + +In the `scrollPlayerIntoView` method, we find the +player's position and update the wrapping element's scroll position. +We change the scroll position by manipulating that element's `scrollLeft` +and `scrollTop` properties when the player is too close to the edge. + +// include_code + +``` +DOMDisplay.prototype.scrollPlayerIntoView = function() { + var width = this.wrap.clientWidth; + var height = this.wrap.clientHeight; + var margin = width / 3; + + // The viewport + var left = this.wrap.scrollLeft, right = left + width; + var top = this.wrap.scrollTop, bottom = top + height; + + var player = this.level.player; + var center = player.pos.plus(player.size.times(0.5)) + .times(scale); + + if (center.x < left + margin) + this.wrap.scrollLeft = center.x - margin; + else if (center.x > right - margin) + this.wrap.scrollLeft = center.x + margin - width; + if (center.y < top + margin) + this.wrap.scrollTop = center.y - margin; + else if (center.y > bottom - margin) + this.wrap.scrollTop = center.y + margin - height; +}; +``` + +{{index center, coordinates, readability}} + +The way the player's +center is found shows how the methods on our `Vector` type allow +computations with objects to be written in a readable way. To +find the actor's center, we add its position (its top-left corner) and +half its size. That is the center in level coordinates, but we need it +in pixel coordinates, so we then multiply the resulting vector by our +display scale. + +{{index validation}} + +Next, a series of checks verify that the player +position isn't outside of the allowed range. Note that sometimes this +will set nonsense scroll coordinates, below zero or beyond the +element's scrollable area. This is okay—the DOM will constrain them to +sane values. Setting `scrollLeft` to -10 will cause it to become 0. + +It would have been slightly simpler to always try to scroll the player +to the center of the ((viewport)). But this creates a rather jarring +effect. As you are jumping, the view will constantly shift up and +down. It is more pleasant to have a “neutral” area in the middle of +the screen where you can move around without causing any scrolling. + +{{index "cleaning up"}} + +Finally, we'll need a way to clear a displayed level, +to be used when the game moves to the next level or resets a level. + +// include_code + +``` +DOMDisplay.prototype.clear = function() { + this.wrap.parentNode.removeChild(this.wrap); +}; +``` + +{{index [game, screenshot]}} + +We are now able to display our tiny level. + +```text/html + + + +``` + +{{if book + +{{figure {url: "img/game_simpleLevel.png", alt: "Our level rendered",width: "7cm"}}} + +if}} + +{{index "link (HTML tag)", "style sheet", CSS}} + +The `` tag, when used +with `rel="stylesheet"`, is a way to load a CSS file into a page. The +file `game.css` contains the styles necessary for our game. + +## Motion and collision + +{{index physics, animation}} + +Now we're at the point where we can start +adding motion—the most interesting aspect of the game. The basic +approach, taken by most games like this, is to split ((time)) into +small steps and, for each step, move the actors by a distance +corresponding to their speed (distance moved per second) multiplied by +the size of the time step (in seconds). + +{{index obstacle, "collision detection"}} + +That is easy. The difficult +part is dealing with the interactions between the elements. When the +player hits a wall or floor, they should not simply move through it. +The game must notice when a given motion causes an object to hit +another object and respond accordingly. For walls, the motion must be +stopped. For coins, the coin must be collected, and so on. + +Solving this for the general case is a big task. You can find +libraries, usually called _((physics engine))s_, that simulate +interaction between physical objects in two or three ((dimensions)). +We'll take a more modest approach in this chapter, handling only +collisions between rectangular objects and handling them in a rather simplistic +way. + +{{index bouncing, "collision detection", animation}} + +Before moving +the ((player)) or a block of ((lava)), we test whether the motion +would take it inside of a nonempty part of the ((background)). If it +does, we simply cancel the motion altogether. The response to such a +collision depends on the type of actor—the player will stop, whereas a +lava block will bounce back. + +{{index discretization}} + +This approach requires our ((time)) steps to be +rather small since it will cause motion to stop before the objects +actually touch. If the time steps (and thus the motion steps) are too +big, the player would end up hovering a noticeable distance above the +ground. Another approach, arguably better but more complicated, would +be to find the exact collision spot and move there. We will take the +simple approach and hide its problems by ensuring the animation +proceeds in small steps. + +{{index obstacle, "obstacleAt method", "collision detection"}} + +This +method tells us whether a ((rectangle)) (specified by a position and a +size) overlaps with any nonempty space on the background grid: + +// include_code + +``` +Level.prototype.obstacleAt = function(pos, size) { + var xStart = Math.floor(pos.x); + var xEnd = Math.ceil(pos.x + size.x); + var yStart = Math.floor(pos.y); + var yEnd = Math.ceil(pos.y + size.y); + + if (xStart < 0 || xEnd > this.width || yStart < 0) + return "wall"; + if (yEnd > this.height) + return "lava"; + for (var y = yStart; y < yEnd; y++) { + for (var x = xStart; x < xEnd; x++) { + var fieldType = this.grid[y][x]; + if (fieldType) return fieldType; + } + } +}; +``` + +{{index "Math.floor function", "Math.ceil function"}} + +This method computes the set +of grid squares that the body ((overlap))s with by using `Math.floor` +and `Math.ceil` on the body's ((coordinates)). Remember that ((grid)) squares +are 1×1 units in size. By ((rounding)) the sides of a box up and +down, we get the range of ((background)) squares that the box touches. + +{{figure {url: "img/game-grid.svg", alt: "Finding collisions on a grid",width: "3cm"}}} + +If the body sticks out of the level, we always return `"wall"` for the +sides and top and `"lava"` for the bottom. This ensures that the +player dies when falling out of the world. When the body is fully +inside the grid, we loop over the block of ((grid)) squares found by +((rounding)) the ((coordinates)) and return the content of the first +nonempty square we find. + +{{index coin, lava, "collision detection"}} + +Collisions between the +((player)) and other dynamic ((actor))s (coins, moving lava) are +handled _after_ the player moved. When the motion has taken the player +into another actor, the appropriate effect—collecting a coin or +dying—is activated. + +{{index "actorAt method"}} + +This method scans the array of actors, +looking for an actor that overlaps the one given as an argument: + +// include_code + +``` +Level.prototype.actorAt = function(actor) { + for (var i = 0; i < this.actors.length; i++) { + var other = this.actors[i]; + if (other != actor && + actor.pos.x + actor.size.x > other.pos.x && + actor.pos.x < other.pos.x + other.size.x && + actor.pos.y + actor.size.y > other.pos.y && + actor.pos.y < other.pos.y + other.size.y) + return other; + } +}; +``` + +{{id actors}} +## Actors and actions + +{{index "animate method", animation, keyboard}} + +The `animate` method +on the `Level` type gives all actors in the level a chance to move. +Its `step` argument is the ((time)) step in seconds. The `keys` object +contains information about the arrow keys the player has pressed. + +// include_code + +``` +var maxStep = 0.05; + +Level.prototype.animate = function(step, keys) { + if (this.status != null) + this.finishDelay -= step; + + while (step > 0) { + var thisStep = Math.min(step, maxStep); + this.actors.forEach(function(actor) { + actor.act(thisStep, this, keys); + }, this); + step -= thisStep; + } +}; +``` + +{{index level, animation}} + +When the level's `status` property has a +non-null value (which is the case when the player has won or lost), we +must count down the `finishDelay` property, which tracks the time +between the point where winning or losing happens and the point where +we want to stop showing the level. + +{{index "while loop", discretization}} + +The `while` loop cuts the time +step we are animating into suitably small pieces. It ensures that no +step larger than `maxStep` is taken. For example, a `step` of 0.12 +second would be cut into two steps of 0.05 seconds and one step of 0.02. + +{{index actor, "Lava type", lava}} + +Actor objects have an `act` +method, which takes as arguments the time step, the level object, and +the `keys` object. Here is one, for the `Lava` actor type, +which ignores the `keys` object: + +// include_code + +``` +Lava.prototype.act = function(step, level) { + var newPos = this.pos.plus(this.speed.times(step)); + if (!level.obstacleAt(newPos, this.size)) + this.pos = newPos; + else if (this.repeatPos) + this.pos = this.repeatPos; + else + this.speed = this.speed.times(-1); +}; +``` + +{{index bouncing, multiplication, "Vector type", "collision detection"}} + +It computes a new position by adding the product of the +((time)) step and its current speed to its old position. If no +obstacle blocks that new position, it moves there. If there is an +obstacle, the behavior depends on the type of the ((lava)) +block—dripping lava has a `repeatPos` property, to which it jumps back +when it hits something. Bouncing lava simply inverts its speed +(multiplies it by -1) in order to start moving in the other direction. + +{{index "Coin type", coin, wave}} + +Coins use their `act` method to +wobble. They ignore collisions since they are simply wobbling around +inside of their own square, and collisions with the ((player)) will be +handled by the _player_'s `act` method. + +// include_code + +``` +var wobbleSpeed = 8, wobbleDist = 0.07; + +Coin.prototype.act = function(step) { + this.wobble += step * wobbleSpeed; + var wobblePos = Math.sin(this.wobble) * wobbleDist; + this.pos = this.basePos.plus(new Vector(0, wobblePos)); +}; +``` + +{{index "Math.sin function", sine, phase}} + +The `wobble` property is +updated to track time and then used as an argument to `Math.sin` to +create a ((wave)), which is used to compute a new position. + +{{index "collision detection", "Player type"}} + +That leaves the ((player)) +itself. Player motion is handled separately per ((axis)) because +hitting the floor should not prevent horizontal motion, and hitting a +wall should not stop falling or jumping motion. This method implements +the horizontal part: + +// include_code + +``` +var playerXSpeed = 7; + +Player.prototype.moveX = function(step, level, keys) { + this.speed.x = 0; + if (keys.left) this.speed.x -= playerXSpeed; + if (keys.right) this.speed.x += playerXSpeed; + + var motion = new Vector(this.speed.x * step, 0); + var newPos = this.pos.plus(motion); + var obstacle = level.obstacleAt(newPos, this.size); + if (obstacle) + level.playerTouched(obstacle); + else + this.pos = newPos; +}; +``` + +{{index animation, keyboard}} + +The horizontal motion is computed based on the state +of the left and right arrow keys. When a motion causes the player to +hit something, the level's `playerTouched` method, which handles +things like dying in ((lava)) and collecting ((coin))s, is called. +Otherwise, the object updates its position. + +Vertical motion works in a similar way but has to simulate +((jumping)) and ((gravity)). + +// include_code + +``` +var gravity = 30; +var jumpSpeed = 17; + +Player.prototype.moveY = function(step, level, keys) { + this.speed.y += step * gravity; + var motion = new Vector(0, this.speed.y * step); + var newPos = this.pos.plus(motion); + var obstacle = level.obstacleAt(newPos, this.size); + if (obstacle) { + level.playerTouched(obstacle); + if (keys.up && this.speed.y > 0) + this.speed.y = -jumpSpeed; + else + this.speed.y = 0; + } else { + this.pos = newPos; + } +}; +``` + +{{index acceleration, physics}} + +At the start of the method, the player +is accelerated vertically to account for ((gravity)). The gravity, +((jumping)) speed, and pretty much all other ((constant))s in this +game have been set by ((trial and error)). I tested various values +until I found a combination I liked. + +{{index "collision detection", keyboard, jumping}} + +Next, we check for +obstacles again. If we hit an obstacle, there are two possible +outcomes. When the up arrow is pressed _and_ we are moving down +(meaning the thing we hit is below us), the speed is set to a +relatively large, negative value. This causes the player to jump. If +that is not the case, we simply bumped into something, and the speed +is reset to zero. + +The actual `act` method looks like this: + +// include_code + +``` +Player.prototype.act = function(step, level, keys) { + this.moveX(step, level, keys); + this.moveY(step, level, keys); + + var otherActor = level.actorAt(this); + if (otherActor) + level.playerTouched(otherActor.type, otherActor); + + // Losing animation + if (level.status == "lost") { + this.pos.y += step; + this.size.y -= step; + } +}; +``` + +{{index player}} + +After moving, the method checks for other actors that the +player is colliding with and again calls `playerTouched` when it +finds one. This time, it passes the actor object as the second argument +because if the other actor is a ((coin)), `playerTouched` needs to +know _which_ coin is being collected. + +{{index animation}} + +Finally, when the player dies (touches lava), we set up +a little animation that causes them to “shrink” or “sink” down by +reducing the height of the player object. + +{{index "collision detection"}} + +And here is the method that handles +collisions between the player and other objects: + +// include_code + +``` +Level.prototype.playerTouched = function(type, actor) { + if (type == "lava" && this.status == null) { + this.status = "lost"; + this.finishDelay = 1; + } else if (type == "coin") { + this.actors = this.actors.filter(function(other) { + return other != actor; + }); + if (!this.actors.some(function(actor) { + return actor.type == "coin"; + })) { + this.status = "won"; + this.finishDelay = 1; + } + } +}; +``` + +When ((lava)) is touched, the game's status is set to `"lost"`. When a +coin is touched, that ((coin)) is removed from the array of actors, +and if it was the last one, the game's status is set to `"won"`. + +This gives us a level that can actually be animated. All that is +missing now is the code that _drives_ the animation. + +## Tracking keys + +{{index keyboard}} + +For a ((game)) like this, we do not want keys to take +effect once per keypress. Rather, we want their effect (moving the player +figure) to continue happening as long as they are pressed. + +{{index "preventDefault method"}} + +We need to set up a key handler that stores +the current state of the left, right, and up arrow keys. We will also want +to call `preventDefault` for those keys so that they don't end up +((scrolling)) the page. + +{{index "trackKeys function", "key code", "event handling", "addEventListener method"}} + +The following function, when given +an object with key codes as property names and key names as values, +will return an object that tracks the current position of those keys. +It registers event handlers for `"keydown"` and `"keyup"` events and, +when the key code in the event is present in the set of codes that it +is tracking, updates the object. + +// include_code + +``` +var arrowCodes = {37: "left", 38: "up", 39: "right"}; + +function trackKeys(codes) { + var pressed = Object.create(null); + function handler(event) { + if (codes.hasOwnProperty(event.keyCode)) { + var down = event.type == "keydown"; + pressed[codes[event.keyCode]] = down; + event.preventDefault(); + } + } + addEventListener("keydown", handler); + addEventListener("keyup", handler); + return pressed; +} +``` + +{{index "keydown event", "keyup event"}} + +Note how the same handler function +is used for both event types. It looks at the event object's `type` +property to determine whether the key state should be updated to true +(`"keydown"`) or false (`"keyup"`). + +{{id runAnimation}} +## Running the game + +{{index "requestAnimationFrame function", animation}} + +The +`requestAnimationFrame` function, which we saw in +[Chapter 13](13_dom.html#animationFrame), provides a good way to +animate a game. But its interface is quite primitive—using it requires +us to track the time at which our function was called the last time +around and call `requestAnimationFrame` again after every frame. + +{{index "runAnimation function", "callback function", [function, "as value"], [function, "higher-order"]}} + +Let's define a helper function that +wraps those boring parts in a convenient interface and allows us to +simply call `runAnimation`, giving it a function that expects a time +difference as an argument and draws a single frame. When the frame +function returns the value `false`, the animation stops. + +// include_code + +``` +function runAnimation(frameFunc) { + var lastTime = null; + function frame(time) { + var stop = false; + if (lastTime != null) { + var timeStep = Math.min(time - lastTime, 100) / 1000; + stop = frameFunc(timeStep) === false; + } + lastTime = time; + if (!stop) + requestAnimationFrame(frame); + } + requestAnimationFrame(frame); +} +``` + +{{index time, discretization}} + +I have set a maximum frame step of 100 +milliseconds (one-tenth of a second). When the browser tab or window +with our page is hidden, `requestAnimationFrame` calls will be +suspended until the tab or window is shown again. In this case, the difference +between `lastTime` and `time` will be the entire time in which the +page was hidden. Advancing the game by that much in a single step will +look silly and might be a lot of work (remember the time-splitting in +the [`animate` method](15_game.html#actors)). + +The function also converts the time steps to seconds, which are an +easier quantity to think about than milliseconds. + +{{index "callback function", "runLevel function"}} + +The `runLevel` function +takes a `Level` object, a constructor for a ((display)), and, +optionally, a function. It displays the level (in `document.body`) and +lets the user play through it. When the level is finished (lost or +won), `runLevel` clears the display, stops the ((animation)), and, if an +`andThen` function was given, calls that function with the level's status. + +// include_code + +``` +var arrows = trackKeys(arrowCodes); + +function runLevel(level, Display, andThen) { + var display = new Display(document.body, level); + runAnimation(function(step) { + level.animate(step, arrows); + display.drawFrame(step); + if (level.isFinished()) { + display.clear(); + if (andThen) + andThen(level.status); + return false; + } + }); +} +``` + +{{index "runGame function"}} + +A game is a sequence of ((level))s. Whenever the +((player)) dies, the current level is restarted. When a level is +completed, we move on to the next level. This can be expressed by the +following function, which takes an array of level plans (arrays of +strings) and a ((display)) constructor: + +// include_code + +``` +function runGame(plans, Display) { + function startLevel(n) { + runLevel(new Level(plans[n]), Display, function(status) { + if (status == "lost") + startLevel(n); + else if (n < plans.length - 1) + startLevel(n + 1); + else + console.log("You win!"); + }); + } + startLevel(0); +} +``` + +{{index [function, "higher-order"], [function, "as value"]}} + +These functions show +a peculiar style of programming. Both `runAnimation` and `runLevel` +are higher-order functions but are not in the style we saw in +[Chapter 5](05_higher_order.html#higher_order). The function +argument is used to arrange things to happen at some time in the +future, and neither of the functions returns anything useful. Their +task is, in a way, to schedule actions. Wrapping these actions in +functions gives us a way to store them as a value so that they can be +called at the right moment. + +{{index "asynchronous programming", "event handling"}} + +This programming +style is usually called _asynchronous_ programming. Event handling is +also an instance of this style, and we will see much more of it when working +with tasks that can take an arbitrary amount of ((time)), such as +((network)) requests in [Chapter 17](17_http.html#http) and input +and output in general in [Chapter 20](20_node.html#node). + +{{index game, "GAME_LEVELS data set"}} + +There is a set of +((level)) plans available in the `GAME_LEVELS` variable (!book (downloadable from +http://eloquentjavascript.net/code#15[_eloquentjavascript.net/code#15_])!). +This page feeds them to `runGame`, starting an actual game: + +{{startCode}} + +[sandbox="null"] +[focus="yes"] +```text/html + + + + + +``` + +{{if interactive + +See if you can beat those. I had quite a lot of fun building them. + +if}} + +## Exercises + +### Game over + +{{index "lives (exercise)", game}} + +It's traditional for ((platform game))s +to have the player start with a limited number of _lives_ and +subtract one life each time they die. When the player is out of lives, the game +restarts from the beginning. + +{{index "runGame function"}} + +Adjust `runGame` to implement lives. Have the +player start with three. + +{{if interactive + +{{test no}} + +[focus="yes"] +```text/html + + + + + +``` + +if}} + +{{hint + +{{index "lives (exercise)", "runGame function"}} + +The most obvious solution +would be to make `lives` a variable that lives in `runGame` and is +thus visible to the `startLevel` ((closure)). + +Another approach, which fits nicely with the spirit of the rest of the +function, would be to add a second ((parameter)) to `startLevel` that +gives the number of lives. When the whole ((state)) of a system is stored +in the arguments to a ((function)), calling that function provides an +elegant way to transition to a new state. + +In any case, when a ((level)) is lost, there should now be two +possible state transitions. If that was the last life, we go back to +level zero with the starting amount of lives. If not, we repeat the +current level with one less life remaining. + +hint}} + +### Pausing the game + +{{index "pausing (exercise)", "escape key", keyboard}} + +Make it possible +to pause (suspend) and unpause the game by pressing the Esc key. + +{{index "runLevel function", "event handling"}} + +This can be done by +changing the `runLevel` function to use another keyboard event +handler and interrupting or resuming the animation whenever the +Esc key is hit. + +{{index "runAnimation function"}} + +The `runAnimation` interface may not look +like it is suitable for this at first glance, but it is, if you +rearrange the way `runLevel` calls it. + +{{index [variable, global], "trackKeys function"}} + +When you have that +working, there is something else you could try. The way we have been +registering keyboard event handlers is somewhat problematic. The +`arrows` object is currently a global variable, and its event handlers +are kept around even when no game is running. You could say they _((leak))_ out of +our system. Extend `trackKeys` to provide a way to +unregister its handlers, and then change `runLevel` to register its +handlers when it starts and unregister them again when it is +finished. + +{{if interactive + +{{test no}} + +[focus="yes"] +```text/html + + + + + +``` + +if}} + +{{hint + +{{index "pausing (exercise)"}} + +An ((animation)) can be interrupted by +returning `false` from the function given to `runAnimation`. It can be +continued by calling `runAnimation` again. + +{{index closure}} + +To communicate that the animation should be +interrupted to the function passed to `runAnimation` so that it can +return `false`, you can use a variable that both the event handler and +that function have access to. + +{{index "event handling", "removeEventListener method", [function, "as value"]}} + +When finding a way to unregister the handlers registered by +`trackKeys`, remember that the _exact_ same function value that was +passed to `addEventListener` must be passed to `removeEventListener` +to successfully remove a handler. Thus, the `handler` function value +created in `trackKeys` must be available to the code that unregisters +the handlers. + +You can add a property to the object returned by `trackKeys`, +containing either that function value or a method that handles the +unregistering directly. + +hint}} + diff --git a/16_canvas.md b/16_canvas.md new file mode 100644 index 000000000..f54c789d1 --- /dev/null +++ b/16_canvas.md @@ -0,0 +1,1635 @@ +{{meta {chap_num: 16, prev_link: 15_game, next_link: 17_http, load_files: ["code/chapter/15_game.js", "code/game_levels.js", "code/chapter/16_canvas.js"], zip: "html include=[\"img/player.png\", \"img/sprites.png\"]"}}} + +# Drawing on Canvas + +{{quote {author: "M.C. Escher,cited by Bruno Ernst in The Magic Mirror of M.C. Escher", chapter: true} + +Drawing is deception. + +quote}} + +{{index "Escher, M.C.", CSS, "transform (CSS)"}} + +Browsers give us +several ways to display ((graphics)). The simplest way is to use styles to +position and color regular ((DOM)) elements. This can +get you quite far, as the game in the [previous chapter](15_game.html#game) +showed. By adding partially transparent background ((image))s to the +nodes, we can make them look exactly the way we want. It is even +possible to rotate or skew nodes by using the `transform` style. + +But we'd be using the DOM for something that it wasn't originally +designed for. Some tasks, such as drawing a ((line)) between +arbitrary points, are extremely awkward to do with regular +((HTML)) elements. + +{{index SVG, "img (HTML tag)"}} + +There are two alternatives. The first is DOM-based +but utilizes _Scalable Vector Graphics (SVG)_, rather than HTML +elements. Think of SVG as a dialect for describing +((document))s that focuses on ((shape))s rather than text. You can embed an SVG +document in an HTML document, or you can include it +through an `` tag. + +{{index clearing}} + +The second alternative is called a _((canvas))_. A +canvas is a single ((DOM)) element that encapsulates a ((picture)). It +provides a programming ((interface)) for drawing ((shape))s onto the +space taken up by the node. The main difference between a canvas and +an SVG picture is that in SVG the original description of the shapes +is preserved so that they can be moved or resized at any time. +A canvas, on the other hand, converts the shapes to ((pixel))s (colored +dots on a raster) as soon as they are drawn and does not remember +what these pixels represent. The only way to move a shape on a canvas +is to clear the canvas (or the part of the canvas around the shape) and redraw it +with the shape in a new position. + +## SVG + +This book will not go into ((SVG)) in detail, but I will briefly + explain how it works. At the +[end of the chapter](16_canvas.html#graphics_tradeoffs), I'll come +back to the trade-offs that you must consider when deciding which +((drawing)) mechanism is appropriate for a given application. + +This is an HTML document with a simple SVG ((picture)) in it: + +```text/html sandbox-svg +

    Normal HTML here.

    + + + + +``` + +{{index "circle (SVG tag)", "rect (SVG tag)", "XML namespace", XML, "xmlns attribute"}} + +The `xmlns` attribute changes an element (and its +children) to a different _XML namespace_. This namespace, identified +by a ((URL)), specifies the dialect that we are currently speaking. +The `` and `` tags, which do not exist in HTML, do have +a meaning in SVG—they draw shapes using the style and position +specified by their attributes. + +{{if book + +The document is displayed like this: + +{{figure {url: "img/svg-demo.png", alt: "An embedded SVG image",width: "4.5cm"}}} + +if}} + +These tags create ((DOM)) elements, just like ((HTML)) tags. For +example, this changes the `` element to be ((color))ed cyan +instead: + +```sandbox-svg +var circle = document.querySelector("circle"); +circle.setAttribute("fill", "cyan"); +``` + +## The canvas element + +{{index [canvas, size], "canvas (HTML tag)"}} + +Canvas ((graphics)) can be drawn +onto a `` element. You can give such an element `width` and +`height` attributes to determine its size in ((pixel))s. + +A new canvas is empty, meaning it is entirely ((transparent)) and +thus shows up simply as empty space in the document. + +{{index "2d (canvas context)", "webgl (canvas context)", OpenGL, [canvas, context], dimensions}} + +The `` +tag is intended to support different styles of ((drawing)). To get +access to an actual drawing ((interface)), we first need to create a +_((context))_, which is an object whose methods provide the drawing +interface. There are currently two widely supported drawing styles: +`"2d"` for two-dimensional graphics and `"webgl"` for +three-dimensional graphics through the OpenGL interface. + +{{index rendering, graphics, efficiency}} + +This book won't discuss +WebGL. We stick to two dimensions. But if you are interested in +three-dimensional graphics, I do encourage you to look into WebGL. It +provides a very direct interface to modern graphics hardware and thus +allows you to render even complicated scenes efficiently, using +JavaScript. + +{{index "getContext method", [canvas, context]}} + +A ((context)) is created +through the `getContext` method on the `` element. + +```text/html +

    Before canvas.

    + +

    After canvas.

    + +``` + +After creating the context object, the example draws a red +((rectangle)) 100 ((pixel))s wide and 50 pixels high, with its top-left +corner at coordinates (10,10). + +{{if book + +{{figure {url: "img/canvas_fill.png", alt: "A canvas with a rectangle",width: "2.5cm"}}} + +if}} + +{{index SVG, coordinates}} + +Just like in ((HTML)) (and SVG), the +coordinate system that the canvas uses puts (0,0) at the top-left +corner, and the positive y-((axis)) goes down from there. So (10,10) +is 10 pixels below and to the right of the top-left corner. + +{{id fill_stroke}} +## Filling and stroking + +{{index filling, stroking, drawing, SVG}} + +In the ((canvas)) interface, +a shape can be _filled_, meaning its area is given a certain color or pattern, +or it can be _stroked_, which means a ((line)) is drawn along its edge. The +same terminology is used by SVG. + +{{index "fillRect method", "strokeRect method"}} + +The `fillRect` method fills +a ((rectangle)). It takes first the x- and y-((coordinates)) of the +rectangle's top-left corner, then its width, and then its height. A +similar method, `strokeRect`, draws the ((outline)) of a rectangle. + +{{index property, state}} + +Neither method takes any further parameters. +The color of the fill, thickness of the stroke, and so on are not +determined by an argument to the method (as you might justly expect) +but rather by properties of the context object. + +{{index filling, "fillStyle property"}} + +Setting `fillStyle` changes the way shapes are +filled. It can be set to a string that specifies a ((color)), and any +color understood by ((CSS)) can also be used here. + +{{index stroking, "line width", "strokeStyle property", "lineWidth property", canvas}} + +The `strokeStyle` property works similarly but +determines the color used for a stroked line. The width of that line +is determined by the `lineWidth` property, which may contain any +positive number. + +```text/html + + +``` + +{{if book + +This code draws two blue squares, using a thicker line for the second +one. + +{{figure {url: "img/canvas_stroke.png", alt: "Two stroked squares",width: "5cm"}}} + +if}} + +{{index "default value", [canvas, size]}} + +When no `width` or `height` +attribute is specified, as in the previous example, a canvas element +gets a default width of 300 pixels and height of 150 pixels. + +## Paths + +{{index [path, canvas], [interface, design], [canvas, path]}} + +A path is a +sequence of ((line))s. The 2D canvas interface takes a peculiar +approach to describing such a path. It is done entirely through +((side effect))s. Paths are not values that can be stored and +passed around. Instead, if you want to do something with a path, you +make a sequence of method calls to describe its shape. + +```text/html + + +``` + +{{index canvas, "stroke method", "lineTo method", "moveTo method", shape}} + +This example creates a path with a number of +horizontal ((line)) segments and then strokes it using the `stroke` +method. Each segment created with `lineTo` starts at the path's +_current_ position. That position is usually the end of the last segment, +unless `moveTo` was called. In that case, the next segment would start +at the position passed to `moveTo`. + +{{if book + +The path described by the previous program looks like this: + +{{figure {url: "img/canvas_path.png", alt: "Stroking a number of lines",width: "2.1cm"}}} + +if}} + +{{index [path, canvas], filling, [path, closing], "fill method"}} + +When +filling a path (using the `fill` method), each ((shape)) is filled +separately. A path can contain multiple shapes—each `moveTo` motion +starts a new one. But the path needs to be _closed_ (meaning its start and +end are in the same position) before it can be filled. If the path is not +already closed, a line is added from its end to its +start, and the shape enclosed by the completed path is filled. + +```text/html + + +``` + +This example draws a filled triangle. Note that only two of the triangle's +sides are explicitly drawn. The third, from the bottom-right corner +back to the top, is implied and won't be there when you stroke the +path. + +{{if book + +{{figure {url: "img/canvas_triangle.png", alt: "Filling a path",width: "2.2cm"}}} + +if}} + +{{index "stroke method", "closePath method", [path, closing], canvas}} + +You could also use the `closePath` method +to explicitly close a path by adding an actual ((line)) segment back to +the path's start. This segment _is_ drawn when stroking the path. + +## Curves + +{{index [path, canvas], canvas, drawing}} + +A path may also contain ((curve))d +((line))s. These are, unfortunately, a bit more involved to draw than +straight lines. + +{{index "quadraticCurveTo method"}} + +The `quadraticCurveTo` method draws a +curve to a given point. To determine the curvature of the line, the method is +given a ((control point)) as well as a destination point. +Imagine this control point as _attracting_ the line, giving the line its +curve. The line won't go through the control point. Rather, the +direction of the line at its start and end points will be such that it +aligns with the line from there to the control point. The following +example illustrates this: + +```text/html + + +``` + +{{if book + +It produces a path that looks like this: + +{{figure {url: "img/canvas_quadraticcurve.png", alt: "A quadratic curve",width: "2.3cm"}}} + +if}} + +{{index "stroke method"}} + +We draw a ((quadratic curve)) from the left to the +right, with (60,10) as control point, and then draw two ((line)) +segments going through that control point and back to the start of +the line. The result somewhat resembles a _((Star Trek))_ insignia. You +can see the effect of the control point: the lines leaving the lower +corners start off in the direction of the control point and then +((curve)) toward their target. + +{{index canvas, "bezierCurveTo method"}} + +The `bezierCurveTo` method draws a +similar kind of curve. Instead of a single ((control point)), this one +has two—one for each of the ((line))'s endpoints. Here is a similar sketch to +illustrate the behavior of such a curve: + +```text/html + + +``` + +The two control points specify the direction at both ends of the +curve. The further they are away from their corresponding point, the +more the curve will “bulge” in that direction. + +{{if book + +{{figure {url: "img/canvas_beziercurve.png", alt: "A bezier curve",width: "2.2cm"}}} + +if}} + +{{index "trial and error"}} + +Such ((curve))s can be hard to work with—it's +not always clear how to find the ((control point))s that provide the +((shape)) you are looking for. Sometimes you can compute +them, and sometimes you'll just have to find a suitable value by trial +and error. + +{{index rounding, canvas, "arcTo method", arc}} + +_Arcs_—fragments of a +((circle))—are easier to reason about. The `arcTo` method +takes no less than five arguments. The first four arguments act +somewhat like the arguments to _quadraticCurveTo_. The first pair +provides a sort of ((control point)), and the second pair gives the +line's destination. The fifth argument provides the ((radius)) of the +arc. The method will conceptually project a corner—a line going to the +control point and then to the destination point—and round the corner's point so +that it forms part of a circle with the given radius. The `arcTo` method then draws +the rounded part, as well as a line from the starting position to the +start of the rounded part. + +```text/html + + +``` + +{{if book + +This produces two rounded corners with different radii. + +{{figure {url: "img/canvas_arc.png", alt: "Two arcs with different radii",width: "2.3cm"}}} + +if}} + +{{index canvas, "arcTo method", "lineTo method"}} + +The `arcTo` method +won't draw the line from the end of the rounded part to the goal +position, though the word _to_ in its name would suggest it does. You +can follow up with a call to `lineTo` with the same goal coordinates +to add that part of the line. + +{{index "arc method", arc}} + +To draw a ((circle)), you could use four +calls to `arcTo` (each turning 90 degrees). But the `arc` method +provides a simpler way. It takes a pair of ((coordinates)) for the +arc's center, a radius, and then a start and end angle. + +{{index pi, "Math.PI constant"}} + +Those last two parameters make it +possible to draw only part of circle. The ((angle))s are measured in +((radian))s, not ((degree))s. This means a full ((circle)) has an +angle of 2π, or `2 * Math.PI`, which is about 6.28. The angle starts counting at +the point to the right of the circle's center and goes clockwise from +there. You can use a start of 0 and an end bigger than 2π (say, 7) +to draw a full circle. + +```text/html + + +``` + +{{index "moveTo method", "arc method", [path, " canvas"]}} + +The resulting picture +contains a ((line)) from the right of the full circle (first call to +`arc`) to the right of the quarter-((circle)) (second call). Like other +path-drawing methods, a line drawn with `arc` is connected to the +previous path segment by default. You'd have to call `moveTo` or +start a new path if you want to avoid this. + +{{if book + +{{figure {url: "img/canvas_circle.png", alt: "Drawing a circle",width: "4.9cm"}}} + +if}} + +{{id pie_chart}} +## Drawing a pie chart + +{{index "pie chart example"}} + +Imagine you've just taken a ((job)) at +EconomiCorp, Inc., and your first assignment is to draw a pie chart of +their customer satisfaction ((survey)) results. + +The `results` variable contains an array of objects that represent the +survey responses. + +// include_code + +```sandbox-pie +var results = [ + {name: "Satisfied", count: 1043, color: "lightblue"}, + {name: "Neutral", count: 563, color: "lightgreen"}, + {name: "Unsatisfied", count: 510, color: "pink"}, + {name: "No comment", count: 175, color: "silver"} +]; +``` + +{{index "pie chart example"}} + +To draw a pie chart, we draw a number of pie +slices, each made up of an ((arc)) and a pair of ((line))s to the center +of that arc. We can compute the ((angle)) taken up by each arc by dividing +a full circle (2π) by the total number of responses and then +multiplying that number (the angle per response) by the number of +people who picked a given choice. + +```text/html sandbox-pie + + +``` + +{{if book + +This draws the following chart: + +{{figure {url: "img/canvas_pie_chart.png", alt: "A pie chart",width: "5cm"}}} + +if}} + +But a chart that doesn't tell us what it means isn't very helpful. We +need a way to draw text to the ((canvas)). + +## Text + +{{index stroking, filling, "fillColor property", "fillText method", "strokeText method"}} + +A 2D canvas drawing context provides +the methods `fillText` and `strokeText`. The latter can be useful for +outlining letters, but usually `fillText` is what you need. It will +fill the given ((text)) with the current `fillColor`. + +```text/html + + +``` + +You can specify the size, style, and ((font)) of the text with the +`font` property. This example just gives a font size and family name. +You can add `italic` or `bold` to the start of the string to select a +style. + +{{index "fillText method", "strokeText method", "textAlign property", "textBaseline property"}} + +The last two arguments to +`fillText` (and `strokeText`) provide the position at which the font +is drawn. By default, they indicate the position of the start of the +text's alphabetic baseline, which is the line that letters “stand” on, not +counting hanging parts in letters like _j_ or _p_. You can change the horizontal +position by setting the `textAlign` property to `"end"` +or `"center"` and the vertical position by setting `textBaseline` to +`"top"`, `"middle"`, or `"bottom"`. + +{{index "pie chart example"}} + +We will come back to our pie chart, and the +problem of ((label))ing the slices, in the +[exercises](16_canvas.html#exercise_pie_chart) at the end of the +chapter. + +## Images + +{{index "vector graphics", "bitmap graphics"}} + +In computer ((graphics)), a +distinction is often made between _vector_ graphics and _bitmap_ +graphics. The first is what we have been doing so far in this +chapter—specifying a picture by giving a logical description of +((shape))s. Bitmap graphics, on the other hand, don't specify actual +shapes but rather work with ((pixel)) data (rasters of colored dots). + +{{index "load event", "event handling", "img (HTML tag)", "drawImage method"}} + +The `drawImage` method allows us to draw ((pixel)) data onto +a ((canvas)). This pixel data can originate from an `` element or +from another canvas, and neither has to be visible in the actual +document. The following example creates a detached `` element and +loads an image file into it. But it cannot immediately start drawing +from this picture because the browser may not have fetched it yet. To +deal with this, we register a `"load"` event handler and do the +drawing after the image has loaded. + +```text/html + + +``` + +{{index "drawImage method", scaling}} + +By default, `drawImage` will draw +the image at its original size. You can also give it two additional +arguments to dictate a different width and height. + +When `drawImage` is given _nine_ arguments, it can be used to draw +only a fragment of an image. The second through fifth arguments indicate the +rectangle (x, y, width, and height) in the source image that should be +copied, and the sixth to ninth arguments give the rectangle (on the +canvas) into which it should be copied. + +{{index "player character", "pixel art"}} + +This can be used to pack multiple +_((sprite))s_ (image elements) into a single image file and then +draw only the part you need. For example, we have this picture containing a +game character in multiple ((pose))s: + +{{figure {url: "img/player_big.png", alt: "Various poses of a game character",width: "6cm"}}} + +By alternating which pose we draw, we can show an ((animation)) that +looks like a walking character. + +{{index "fillRect method", "clearRect method", clearing}} + +To animate +the ((picture)) on a ((canvas)), the `clearRect` method is useful. It +resembles `fillRect`, but instead of coloring the rectangle, it makes +it ((transparent)), removing the previously drawn pixels. + +{{index "setInterval function", "img (HTML tag)"}} + +We know that each +_((sprite))_, each subpicture, is 24 ((pixel))s wide and 30 pixels +high. The following code loads the image and then sets up an interval +(repeated timer) to draw the next _((frame))_: + +```text/html + + +``` + +{{index "remainder operator", "% operator"}} + +The `cycle` variable tracks +our position in the ((animation)). Each ((frame)), it is incremented +and then clipped back to the 0 to 7 range by using the remainder +operator. This variable is then used to compute the x-coordinate that +the sprite for the current pose has in the picture. + +## Transformation + +{{index transformation, mirroring}} + +{{indexsee flipping, mirroring}} + +But what if we want our character to +walk to the left instead of to the right? We could add another set of +sprites, of course. But we can also instruct the ((canvas)) to draw +the picture the other way round. + +{{index "scale method", scaling}} + +Calling the `scale` method will cause +anything drawn after it to be scaled. This method takes two parameters, one to +set a horizontal scale and one to set a vertical scale. + +```text/html + + +``` + +{{if book + +Due to the call to `scale`, the circle is drawn three times as wide +and half as high. + +{{figure {url: "img/canvas_scale.png", alt: "A scaled circle",width: "6.6cm"}}} + +if}} + +{{index mirroring}} + +Scaling will cause everything about the drawn image, including the +((line width)), to be stretched out or squeezed together as specified. +Scaling by a negative amount will flip the picture around. The +flipping happens around point (0,0), which means it will also +flip the direction of the coordinate system. When a horizontal scaling +of -1 is applied, a shape drawn at x position 100 will end up at what +used to be position -100. + +{{index "drawImage method"}} + +So to turn a picture around, we can't simply +add `cx.scale(-1, 1)` before the call to `drawImage` since that would +move our picture outside of the ((canvas)), where it won't be visible. +You could adjust the ((coordinates)) given to +`drawImage` to compensate for this by drawing the image at x position -50 +instead of 0. Another solution, which doesn't require the code that does +the drawing to know about the scale change, is to adjust the ((axis)) +around which the scaling happens. + +{{index "rotate method", "translate method", transformation}} + +There are several +other methods besides `scale` that influence the coordinate system for a ((canvas)). +You can rotate subsequently drawn shapes with the `rotate` method and move them with the +`translate` method. The interesting—and confusing—thing is that these +transformations _stack_, meaning that each one happens relative to the +previous transformations. + +{{index "rotate method", "translate method"}} + +So if we translate by +10 horizontal pixels twice, everything will be drawn 20 pixels to the +right. If we first move the center of the coordinate system to (50,50) +and then rotate by 20 ((degree))s (0.1π in ((radian))s), that rotation +will happen _around_ point (50,50). + +{{figure {url: "img/transform.svg", alt: "Stacking transformations",width: "9cm"}}} + +{{index coordinates}} + +But if we _first_ rotate by 20 degrees and _then_ +translate by (50,50), the translation will happen in the rotated +coordinate system and thus produce a different orientation. The order +in which transformations are applied matters. + +{{index axis, mirroring}} + +To flip a picture around the vertical line at a given x +position, we can do the following: + +// include_code + +``` +function flipHorizontally(context, around) { + context.translate(around, 0); + context.scale(-1, 1); + context.translate(-around, 0); +} +``` + +{{index "flipHorizontally method"}} + +We move the y-((axis)) to where we +want our ((mirror)) to be, apply the mirroring, and finally move +the y-axis back to its proper place in the mirrored universe. The +following picture explains why this works: + +{{figure {url: "img/mirror.svg", alt: "Mirroring around a vertical line",width: "8cm"}}} + +{{index "translate method", "scale method", transformation, canvas}} + +This shows the coordinate +systems before and after mirroring across the central line. If we draw a +triangle at a positive x position, it would, by default, be in the +place where triangle 1 is. A call to `flipHorizontally` first does a +translation to the right, which gets us to triangle 2. It then scales, +flipping the triangle back to position 3. This is not where it should +be, if it were mirrored in the given line. The second `translate` call +fixes this—it “cancels” the initial translation and makes triangle 4 +appear exactly where it should. + +We can now draw a mirrored character at position (100,0) by flipping +the world around the character's vertical center. + +```text/html + + +``` + +## Storing and clearing transformations + +{{index "side effect", canvas, transformation}} + +Transformations stick +around. Everything else we draw after ((drawing)) that mirrored +character would also be mirrored. That might be a problem. + +It is possible to save the current transformation, do some drawing and +transforming, and then restore the old transformation. This is usually +the proper thing to do for a function that needs to temporarily +transform the coordinate system. First, we save whatever transformation the code that +called the function was using. Then, the function does its thing (on top of the +existing transformation), possibly adding more transformations. And finally, we +revert to the transformation that we started with. + +{{index "save method", "restore method"}} + +The `save` and `restore` methods +on the 2D ((canvas)) context perform this kind of ((transformation)) +management. They conceptually keep a stack of transformation +((state))s. When you call `save`, the current state is pushed onto the +stack, and when you call `restore`, the state on top of the stack is +taken off and used as the context's current transformation. + +{{index "branching recursion", "fractal example", recursion}} + +The `branch` function in the following example +illustrates what you can do with a function that changes the +transformation and then calls another function (in this case itself), +which continues drawing with the given transformation. + +This function draws a treelike shape by drawing a line, +moving the center of the coordinate system to the end of the line, and calling +itself twice—first rotated to the left and then rotated to the +right. Every call reduces the length of the branch drawn, and the +recursion stops when the length drops below 8. + +```text/html + + +``` + +{{if book + +The result is a simple fractal. + +{{figure {url: "img/canvas_tree.png", alt: "A recursive picture",width: "5cm"}}} + +if}} + +{{index "save method", "restore method", canvas, "rotate method"}} + +If +the calls to `save` and `restore` were not there, the second recursive +call to `branch` would end up with the position and rotation created +by the first call. It wouldn't be connected to the current branch but +rather to the innermost, rightmost branch drawn by the first call. The +resulting shape might also be interesting, but it is definitely not a +tree. + +{{id canvasdisplay}} +## Back to the game + +{{index "drawImage method"}} + +We now know enough about ((canvas)) drawing to +start working on a ((canvas))-based ((display)) system for the +((game)) from the [previous chapter](15_game.html#game). The new +display will no longer be showing just colored boxes. Instead, we'll +use `drawImage` to draw pictures that represent the game's elements. + +{{index "CanvasDisplay type", "DOMDisplay type"}} + +We will define an object +type `CanvasDisplay`, supporting the same ((interface)) as +`DOMDisplay` from [Chapter 15](15_game.html#domdisplay), namely, the +methods `drawFrame` and `clear`. + +{{index state}} + +This object keeps a little more information than +`DOMDisplay`. Rather than using the scroll position of its DOM +element, it tracks its own ((viewport)), which tells us what part of +the level we are currently looking at. It also tracks ((time)) and +uses that to decide which ((animation)) ((frame)) to use. And finally, +it keeps a `flipPlayer` property so that even when the player is +standing still, it keeps facing the direction it last moved in. + +// include_code + +```sandbox-game +function CanvasDisplay(parent, level) { + this.canvas = document.createElement("canvas"); + this.canvas.width = Math.min(600, level.width * scale); + this.canvas.height = Math.min(450, level.height * scale); + parent.appendChild(this.canvas); + this.cx = this.canvas.getContext("2d"); + + this.level = level; + this.animationTime = 0; + this.flipPlayer = false; + + this.viewport = { + left: 0, + top: 0, + width: this.canvas.width / scale, + height: this.canvas.height / scale + }; + + this.drawFrame(0); +} + +CanvasDisplay.prototype.clear = function() { + this.canvas.parentNode.removeChild(this.canvas); +}; +``` + +{{index "CanvasDisplay type"}} + +The `animationTime` counter is the reason we +passed the step size to `drawFrame` in +[Chapter 15](15_game.html#domdisplay), even though `DOMDisplay` +does not use it. Our new `drawFrame` function uses the counter to track time +so that it can switch between ((animation)) ((frame))s based on the +current time. + +// include_code + +```sandbox-game +CanvasDisplay.prototype.drawFrame = function(step) { + this.animationTime += step; + + this.updateViewport(); + this.clearDisplay(); + this.drawBackground(); + this.drawActors(); +}; +``` + +{{index scrolling}} + +Other than tracking time, the method updates the +((viewport)) for the current player position, fills the whole canvas +with a background color, and draws the ((background)) and ((actor))s +onto that. Note that this is different from the approach in +[Chapter 15](15_game.html#domdisplay), where we drew the background +once and scrolled the wrapping DOM element to move it. + +{{index clearing}} + +Because shapes on a canvas are just ((pixel))s, after we +draw them, there is no way to move them (or remove them). The only way +to update the canvas display is to clear it and redraw the scene. + +{{index "CanvasDisplay type"}} + +The `updateViewport` method is similar to +`DOMDisplay`'s `scrollPlayerIntoView` method. It checks whether the +player is too close to the edge of the screen and moves the +((viewport)) when this is the case. + +// include_code + +```sandbox-game +CanvasDisplay.prototype.updateViewport = function() { + var view = this.viewport, margin = view.width / 3; + var player = this.level.player; + var center = player.pos.plus(player.size.times(0.5)); + + if (center.x < view.left + margin) + view.left = Math.max(center.x - margin, 0); + else if (center.x > view.left + view.width - margin) + view.left = Math.min(center.x + margin - view.width, + this.level.width - view.width); + if (center.y < view.top + margin) + view.top = Math.max(center.y - margin, 0); + else if (center.y > view.top + view.height - margin) + view.top = Math.min(center.y + margin - view.height, + this.level.height - view.height); +}; +``` + +{{index boundary, "Math.max function", "Math.min function", clipping}} + +The calls +to `Math.max` and `Math.min` ensure that the viewport does +not end up showing space outside of the level. `Math.max(x, 0)` +ensures that the resulting number is not less than zero. +`Math.min`, similarly, ensures a value stays below a given bound. + +When ((clearing)) the display, we'll use a slightly different +((color)) depending on whether the game is won (brighter) or lost +(darker). + +// include_code + +```sandbox-game +CanvasDisplay.prototype.clearDisplay = function() { + if (this.level.status == "won") + this.cx.fillStyle = "rgb(68, 191, 255)"; + else if (this.level.status == "lost") + this.cx.fillStyle = "rgb(44, 136, 214)"; + else + this.cx.fillStyle = "rgb(52, 166, 251)"; + this.cx.fillRect(0, 0, + this.canvas.width, this.canvas.height); +}; +``` + +{{index "Math.floor function", "Math.ceil function", rounding}} + +To draw the +background, we run through the tiles that are visible in the current +viewport, using the same trick used in `obstacleAt` in the +[previous chapter](15_game.html#viewport). + +// include_code + +```sandbox-game +var otherSprites = document.createElement("img"); +otherSprites.src = "img/sprites.png"; + +CanvasDisplay.prototype.drawBackground = function() { + var view = this.viewport; + var xStart = Math.floor(view.left); + var xEnd = Math.ceil(view.left + view.width); + var yStart = Math.floor(view.top); + var yEnd = Math.ceil(view.top + view.height); + + for (var y = yStart; y < yEnd; y++) { + for (var x = xStart; x < xEnd; x++) { + var tile = this.level.grid[y][x]; + if (tile == null) continue; + var screenX = (x - view.left) * scale; + var screenY = (y - view.top) * scale; + var tileX = tile == "lava" ? scale : 0; + this.cx.drawImage(otherSprites, + tileX, 0, scale, scale, + screenX, screenY, scale, scale); + } + } +}; +``` + +{{index "drawImage method", sprite, tile}} + +Tiles that are not empty (null) +are drawn with `drawImage`. The `otherSprites` image contains the +pictures used for elements other than the player. It contains, from +left to right, the wall tile, the lava tile, and the sprite for a +coin. + +{{figure {url: "img/sprites_big.png", alt: "Sprites for our game",width: "1.4cm"}}} + +{{index scaling}} + +Background tiles are 20 by 20 pixels, since we will use +the same scale that we used in `DOMDisplay`. Thus, the offset for lava +tiles is 20 (the value of the `scale` variable), and the offset for +walls is 0. + +{{index drawing, "load event", "drawImage method"}} + +We don't bother +waiting for the sprite image to load. Calling +`drawImage` with an image that hasn't been loaded yet will simply do +nothing. Thus, we might fail to draw the game properly for the first +few ((frame))s, while the image is still loading, but that is not a +serious problem. Since we keep updating the screen, the correct scene +will appear as soon as the loading finishes. + +{{index "player character", animation, drawing}} + +The ((walking)) +character shown earlier will be used to represent the player. The +code that draws it needs to pick the right ((sprite)) and direction +based on the player's current motion. The first eight sprites contain a +walking animation. When the player is moving along a floor, we cycle +through them based on the display's `animationTime` property. This is +measured in seconds, and we want to switch frames 12 times per +second, so the ((time)) is multiplied by 12 first. When the player is +standing still, we draw the ninth sprite. During jumps, which are +recognized by the fact that the vertical speed is not zero, we use the +tenth, rightmost sprite. + +{{index "flipHorizontally function", "CanvasDisplay type"}} + +Because the +((sprite))s are slightly wider than the player object—24 instead of 16 +pixels, to allow some space for feet and arms—the method has to adjust +the x-coordinate and width by a given amount (`playerXOverlap`). + +// include_code + +```sandbox-game +var playerSprites = document.createElement("img"); +playerSprites.src = "img/player.png"; +var playerXOverlap = 4; + +CanvasDisplay.prototype.drawPlayer = function(x, y, width, + height) { + var sprite = 8, player = this.level.player; + width += playerXOverlap * 2; + x -= playerXOverlap; + if (player.speed.x != 0) + this.flipPlayer = player.speed.x < 0; + + if (player.speed.y != 0) + sprite = 9; + else if (player.speed.x != 0) + sprite = Math.floor(this.animationTime * 12) % 8; + + this.cx.save(); + if (this.flipPlayer) + flipHorizontally(this.cx, x + width / 2); + + this.cx.drawImage(playerSprites, + sprite * width, 0, width, height, + x, y, width, height); + + this.cx.restore(); +}; +``` + +The `drawPlayer` method is called by `drawActors`, which is responsible for +drawing all the actors in the game. + +// include_code + +```sandbox-game +CanvasDisplay.prototype.drawActors = function() { + this.level.actors.forEach(function(actor) { + var width = actor.size.x * scale; + var height = actor.size.y * scale; + var x = (actor.pos.x - this.viewport.left) * scale; + var y = (actor.pos.y - this.viewport.top) * scale; + if (actor.type == "player") { + this.drawPlayer(x, y, width, height); + } else { + var tileX = (actor.type == "coin" ? 2 : 1) * scale; + this.cx.drawImage(otherSprites, + tileX, 0, width, height, + x, y, width, height); + } + }, this); +}; +``` + +When ((drawing)) something that is not the ((player)), we look at its +type to find the offset of the correct sprite. The ((lava)) tile is +found at offset 20, and the ((coin)) sprite is found at 40 (two times `scale`). + +{{index viewport}} + +We have to subtract the viewport's position when +computing the actor's position since (0,0) on our ((canvas)) +corresponds to the top left of the viewport, not the top left of the +level. We could also have used `translate` for this. Either way works. + +{{if interactive + +{{index "GAME_LEVELS data set", [game, "with canvas"]}} + +The tiny document +shown next plugs the new display into `runGame`: + +{{startCode}} + +[sandbox="game"] +[focus="yes"] +```text/html + + + +``` + +if}} + +{{if book + +{{index [game, screenshot]}} + +That concludes the new ((display)) system. The +resulting game looks something like this: + +{{figure {url: "img/canvas_game.png", alt: "The game as shown on canvas",width: "8cm"}}} + +if}} + +{{id graphics_tradeoffs}} +## Choosing a graphics interface + +Whenever you need to generate graphics in the browser, you can choose +between plain ((HTML)), ((SVG)), and ((canvas)). There is no single +_best_ approach that works in all situations. Each option has +strengths and weaknesses. + +{{index "text wrapping"}} + +Plain HTML has the advantage of being simple. It +also integrates well with ((text)). Both SVG and canvas allow you to +draw text, but they won't help you position that text or wrap it +when it takes up more than one line. In an HTML-based picture, it is +easy to include blocks of text. + +{{index zooming, SVG}} + +SVG can be used to produce ((crisp)) ((graphics)) +that look good at any zoom level. It is more difficult to use than +plain HTML but also much more powerful. + +{{index DOM, SVG, "event handling"}} + +Both SVG and HTML build up a +((data structure)) (the DOM) that represents the picture. This makes +it possible to modify elements after they are drawn. If you need to +repeatedly change a small part of a big ((picture)) in response to +what the user is doing or as part of an ((animation)), doing it in a +canvas can be needlessly expensive. The DOM also allows us to register +mouse event handlers on every element in the picture (even on shapes +drawn with SVG). You can't do that with canvas. + +{{index performance, optimization}} + +But ((canvas))’s ((pixel))-oriented +approach can be an advantage when drawing a huge amount of tiny +elements. The fact that it does not build up a data structure but +only repeatedly draws onto the same pixel surface gives canvas a +lower cost per shape. + +{{index "ray tracer"}} + +There are also effects, such as rendering a scene one +pixel at a time (for example, using a ray tracer) or postprocessing +an image with JavaScript (blurring or distorting it), that can only be +realistically handled by a ((pixel))-based technique. + +In some cases, you may want to combine several of these +techniques. For example, you might draw a ((graph)) with ((SVG)) or +((canvas)) but show ((text))ual information by positioning an +((HTML)) element on top of the picture. + +{{index display}} + +For nondemanding applications, it really doesn't matter +much which interface you choose. The +[second display](16_canvas.html#canvasdisplay) we built for our +game in this chapter could have been implemented using any of these +three ((graphics)) technologies since it does not need to draw text, +handle mouse interaction, or work with an extraordinarily large amount +of elements. + +## Summary + +In this chapter, we discussed techniques for drawing graphics in the +browser, focusing on the `` element. + +A canvas node represents an area in a document that our program may +draw on. This drawing is done through a drawing context object, +created with the `getContext` method. + +The 2D drawing interface allows us to fill and stroke various shapes. +The context's `fillStyle` property determines how shapes are filled. The +`strokeStyle` and `lineWidth` properties control the way lines are drawn. + +Rectangles and pieces of text can be drawn with a single method call. +The `fillRect` and `strokeRect` methods draw rectangles, and the +`fillText` and `strokeText` methods draw text. To create custom shapes, +we must first build up a path. + +{{index stroking, filling}} + +Calling `beginPath` starts a new path. A +number of other methods add lines and curves to the current path. For +example, `lineTo` can add a straight line. When a path is +finished, it can be filled with the `fill` method or stroked with the +`stroke` method. + +Moving pixels from an image or another canvas onto our canvas is done +with the `drawImage` method. By default, this method draws the whole +source image, but by giving it more parameters, you can copy +a specific area of the image. We used this for our game by copying individual +poses of the game character out of an image that contained many +such poses. + +Transformations allow you to draw a shape in multiple orientations. +A 2D drawing context has a current transformation that can be changed +with the `translate`, `scale`, and `rotate` methods. These will affect +all subsequent drawing operations. A transformation state can be saved +with the `save` method and restored with the `restore` method. + +When drawing an animation on a canvas, the `clearRect` method can be +used to clear part of the canvas before redrawing it. + +## Exercises + +### Shapes + +{{index "shapes (exercise)"}} + +Write a program that draws the following +((shape))s on a ((canvas)): + +1. A ((trapezoid)) (a ((rectangle)) that is wider on one side) + +{{index rotation}} + +2. A red ((diamond)) (a rectangle rotated 45 degrees or ¼π radians) + +3. A zigzagging ((line)) + +4. A ((spiral)) made up of 100 straight line segments + +5. A yellow ((star)) + +{{figure {url: "img/exercise_shapes.png", alt: "The shapes to draw",width: "8cm"}}} + +When drawing the last two, you may want to refer to the +explanation of `Math.cos` and `Math.sin` in +[Chapter 13](13_dom.html#sin_cos), which describes how to get +coordinates on a circle using these functions. + +{{index readability, "hard-coding"}} + +I recommend creating a function for +each shape. Pass the position, and optionally other properties, +such as the size or the number of points, as parameters. The +alternative, which is to hard-code numbers all over your code, tends +to make the code needlessly hard to read and modify. + +{{if interactive + +{{test no}} + +```text/html + + +``` + +if}} + +{{hint + +{{index [path, canvas], "shapes (exercise)"}} + +The ((trapezoid)) (1) is easy to draw using +a path. Pick suitable center coordinates and add each of the four +corners around that. + +{{index "flipHorizontally function", rotation}} + +The ((diamond)) (2) can +be drawn the easy way, with a path, or the interesting way, with a +`rotate` ((transformation)). To use rotation, you will have to apply a +trick similar to what we did in the `flipHorizontally` function. +Because you want to rotate around the center of your rectangle and +not around the point (0,0), you must first `translate` to there, then +rotate, and then translate back. + +{{index "remainder operator", "% operator"}} + +For the ((zigzag)) (3) it +becomes impractical to write a new call to `lineTo` for each line +segment. Instead, you should use a ((loop)). You can have each +iteration draw either two ((line)) segments (right and then left again) or +one, in which case you must use the evenness (`% 2`) of the loop index +to determine whether to go left or right. + +You'll also need a loop for the ((spiral)) (4). If you draw a series +of points, with each point moving further along a circle around the +spiral's center, you get a circle. If, during the loop, you vary the +radius of the circle on which you are putting the current point and +go around more than once, the result is a spiral. + +{{index "quadraticCurveTo method"}} + +The ((star)) (5) depicted is built out of +`quadraticCurveTo` lines. You could also draw one with straight lines. +Divide a circle into eight pieces, or a piece for each point you want your +star to have. Draw lines between these points, making them curve +toward the center of the star. With `quadraticCurveTo`, you can use +the center as the control point. + +hint}} + +{{id exercise_pie_chart}} +### The pie chart + +{{index label, text, "pie chart example"}} + +[Earlier](16_canvas.html#pie_chart) in the chapter, we +saw an example program that drew a pie chart. Modify this program so +that the name of each category is shown next to the slice that +represents it. Try to find a pleasing-looking way to automatically +position this text, which would work for other data sets as well. You +may assume that categories are no smaller than 5 percent (that is, there won't be +a bunch of tiny ones next to each other). + +You might again need `Math.sin` and `Math.cos`, as described in the +previous exercise. + +{{if interactive + +{{test no}} + +```text/html + + +``` + +if}} + +{{hint + +{{index "fillText method", "textAlign property", "textBaseline property", "pie chart example"}} + +You will need to call `fillText` +and set the context's `textAlign` and `textBaseline` properties in +such a way that the text ends up where you want it. + +A sensible way to position the labels would be to put the text on the +line going from the center of the pie through the middle of the slice. +You don't want to put the text directly against the side of the pie +but rather move the text out to the side of the pie by a given number of pixels. + +The ((angle)) of this line is `currentAngle + 0.5 * sliceAngle`. The +following code finds a position on this line, 120 pixels from the center: + +{{test no}} + +``` +var middleAngle = currentAngle + 0.5 * sliceAngle; +var textX = Math.cos(middleAngle) * 120 + centerX; +var textY = Math.sin(middleAngle) * 120 + centerY; +``` + +For `textBaseline`, the value `"middle"` is probably appropriate when +using this approach. What to use for `textAlign` depends on the side +of the circle we are on. On the left, it should be `"right"`, and on +the right, it should be `"left"` so that the text is positioned away +from the pie. + +{{index "Math.cos function"}} + +If you are not sure how to find out which side +of the circle a given angle is on, look to the explanation of +`Math.cos` in the previous exercise. The cosine of an angle tells us +which x-coordinate it corresponds to, which in turn tells us exactly +which side of the circle we are on. + +hint}} + +### A bouncing ball + +{{index animation, "requestAnimationFrame function", bouncing}} + +Use +the `requestAnimationFrame` technique that we saw in +[Chapter 13](13_dom.html#animationFrame) and +[Chapter 15](15_game.html#runAnimation) to draw a ((box)) with a +bouncing ((ball)) in it. The ball moves at a constant +((speed)) and bounces off the box's sides when it hits them. + +{{if interactive + +{{test no}} + +```text/html + + +``` + +if}} + +{{hint + +{{index "strokeRect method", animation, "arc method"}} + +A ((box)) is +easy to draw with `strokeRect`. Define a variable that holds its size +or define two variables if your box's width and height differ. To create a +round ((ball)), start a path, call _arc(x, y, radius, 0, 7)_, which creates an arc +going from zero to more than a whole circle, and fill it. + +{{index "collision detection", "Vector type"}} + +To model the ball's position +and ((speed)), you can use the `Vector` type from +[Chapter 15](15_game.html#vector)(!interactive (which is available on this +page)!). Give it a starting speed, preferably one that is not purely +vertical or horizontal, and every ((frame)), multiply that speed with +the amount of time that elapsed. When the ball gets too close to a +vertical wall, invert the x component in its speed. Likewise, invert +the y component when it hits a horizontal wall. + +{{index "clearRect method", clearing}} + +After finding the ball's new +position and speed, use `clearRect` to delete the scene and redraw it +using the new position. + +hint}} + +### Precomputed mirroring + +{{index optimization, "bitmap graphics", mirror}} + +One unfortunate +thing about ((transformation))s is that they slow down drawing of +bitmaps. For vector graphics, the effect is less serious since +only a few points (for example, the center of a circle) need to be +transformed, after which drawing can happen as normal. For a bitmap +image, the position of each ((pixel)) has to be transformed, and +though it is possible that ((browser))s will get more clever about +this in the ((future)), this currently causes a measurable increase in +the time it takes to draw a bitmap. + +In a game like ours, where we are drawing only a single transformed +sprite, this is a nonissue. But imagine that we need to draw hundreds +of characters or thousands of rotating particles from an explosion. + +Think of a way to allow us to draw an inverted character without +loading additional image files and without having to make transformed +`drawImage` calls every frame. + +{{hint + +{{index mirror, scaling, "drawImage method"}} + +The key to the solution +is the fact that we can use a ((canvas)) element as a source image +when using `drawImage`. It is possible to create an extra `` +element, without adding it to the document, and draw our inverted +sprites to it, once. When drawing an actual frame, we just copy the +already inverted sprites to the main canvas. + +{{index "load event"}} + +Some care would be required because images do not load +instantly. We do the inverted drawing only once, and if we do it +before the image loads, it won't draw anything. A `"load"` handler on +the image can be used to draw the inverted images to the extra canvas. +This canvas can be used as a drawing source immediately (it'll simply +be blank until we draw the character onto it). + +hint}} + diff --git a/17_http.md b/17_http.md new file mode 100644 index 000000000..b43b6b806 --- /dev/null +++ b/17_http.md @@ -0,0 +1,1088 @@ +{{meta {chap_num: 17, prev_link: 16_canvas, next_link: 18_forms, load_files: ["code/chapter/17_http.js", "code/promise.js"]}}} + +# HTTP + +{{quote {author: "Tim Berners-Lee,The World Wide Web: A very short personal history", chapter: true} + +The dream behind the Web is of a common information space in which we +communicate by sharing information. Its universality is essential: the +fact that a hypertext link can point to anything, be it personal, +local or global, be it draft or highly polished. + +quote}} + +{{index "Berners-Lee, Tim", "World Wide Web", HTTP}} + +The +_Hypertext Transfer Protocol_, already mentioned in +[Chapter 12](12_browser.html#web), is the mechanism through which +data is requested and provided on the ((World Wide Web)). This chapter +describes the ((protocol)) in more detail and explains the way ((browser)) +JavaScript has access to it. + +## The protocol + +{{index "IP address"}} + +If you type _eloquentjavascript.net/17_http.html_ into +your browser's ((address bar)), the ((browser)) first looks up the +((address)) of the server associated with _eloquentjavascript.net_ +and tries to open a ((TCP)) ((connection)) to it on ((port)) 80, the +default port for ((HTTP)) traffic. If the ((server)) exists and +accepts the connection, the browser sends something like this: + +```http +GET /17_http.html HTTP/1.1 +Host: eloquentjavascript.net +User-Agent: Your browser's name +``` + +Then the server responds, through that same connection. + +```http +HTTP/1.1 200 OK +Content-Length: 65585 +Content-Type: text/html +Last-Modified: Wed, 09 Apr 2014 10:48:09 GMT + + +... the rest of the document +``` + +The browser then takes the part of the ((response)) after the blank +line and displays it as an ((HTML)) document. + +{{index HTTP}} + +The information sent by the client is called the +_((request))_. It starts with this line: + +```http +GET /17_http.html HTTP/1.1 +``` + +{{index "DELETE method", "PUT method", "GET method"}} + +The first word is +the _((method))_ of the ((request)). `GET` means that we want to _get_ +the specified resource. Other common methods are `DELETE` to delete a +resource, `PUT` to replace it, and `POST` to send information to it. +Note that the ((server)) is not obliged to carry out every request it +gets. If you walk up to a random website and tell it to `DELETE` its +main page, it'll probably refuse. + +{{index [path, URL], Twitter}} + +The part after the ((method)) name is the path of the +((resource)) the request applies to. In the simplest case, a resource +is simply a ((file)) on the ((server)), but the protocol doesn't +require it to be. A resource may be anything that can be transferred _as if_ +it is a file. Many servers generate the responses they produce on the +fly. For example, if you open +http://twitter.com/marijnjh[_twitter.com/marijnjh_], the server looks +in its database for a user named _marijnjh_, and if it finds one, it +will generate a profile page for that user. + +After the resource path, the first line of the request mentions +`HTTP/1.1` to indicate the ((version)) of the ((HTTP)) ((protocol)) +it is using. + +{{index "status code"}} + +The server's ((response)) will start with a version +as well, followed by the status of the response, first as a +three-digit status code and then as a human-readable string. + +```http +HTTP/1.1 200 OK +``` + +{{index "200 (HTTP status code)", "error response", "404 (HTTP status code)"}} + +Status codes starting with a 2 indicate that the request succeeded. +Codes starting with 4 mean there was something wrong with the +((request)). 404 is probably the most famous HTTP status code—it means +that the resource that was requested could not be found. Codes that +start with 5 mean an error happened on the ((server)) and the request +is not to blame. + +{{index HTTP}} + +{{id headers}} +The first line of a request or response may be followed by +any number of _((header))s_. These are lines in the form “name: value” +that specify extra information about the request or response. These +headers were part of the example ((response)): + +```null +Content-Length: 65585 +Content-Type: text/html +Last-Modified: Wed, 09 Apr 2014 10:48:09 GMT +``` + +{{index "Content-Length header", "Content-Type header", "Last-Modified header"}} + +This tells us the size and type of the response document. In +this case, it is an HTML document of 65,585 bytes. It also tells us when +that document was last modified. + +{{index "Host header", domain}} + +For the most part, a client or server +decides which ((header))s to include in a ((request)) or ((response)), +though a few headers are required. For example, the `Host` header, +which specifies the hostname, should be included in a request +because a ((server)) might be serving multiple hostnames on a single +((IP address)), and without that header, the server won't know which host the +client is trying to talk to. + +{{index "GET method", "DELETE method", "PUT method", "POST method", "body (HTTP)"}} + +After the headers, both requests and +responses may include a blank line followed by a _body_, which +contains the data being sent. `GET` and `DELETE` requests don't send +along any data, but `PUT` and `POST` requests do. +Similarly, some response types, such as error responses, do not +require a body. + +## Browsers and HTTP + +{{index HTTP}} + +As we saw in the example, a ((browser)) will make a request +when we enter a ((URL)) in its ((address bar)). When the resulting +HTML page references other files, such as ((image))s and JavaScript +((file))s, those are also fetched. + +{{index parallelism, "GET method"}} + +A moderately complicated ((website)) can easily +include anywhere from 10 to 200 ((resource))s. To be able to +fetch those quickly, browsers will make several requests +simultaneously, rather than waiting for the responses one at a time. +Such documents are always fetched using `GET` +((request))s. + +{{id http_forms}} +HTML pages may include _((form))s_, which allow +the user to fill out information and send it to the server. This is an +example of a form: + +```text/html +
    +

    Name:

    +

    Message:

    +

    +
    +``` + +{{index form, "method attribute", "GET method"}} + +This code describes a form with two +((field))s: a small one asking for a name and a larger one to write a +message in. When you click the Send ((button)), the information in +those fields will be encoded into a _((query string))_. When the +`
    ` element's `method` attribute is `GET` (or is omitted), that +query string is tacked onto the `action` URL, and the browser makes a +`GET` request to that URL. + +```text/html +GET /example/message.html?name=Jean&message=Yes%3F HTTP/1.1 +``` + +{{index "ampersand character"}} + +The start of a ((query string)) is indicated +by a ((question mark)). After that follow pairs of names and values, +corresponding to the `name` attribute on the form field elements and +the content of those elements, respectively. An ampersand character (`&`) is used to separate +the pairs. + +{{index [escaping, "in URLs"], "hexadecimal number", "percent sign", "URL encoding", "encodeURIComponent function", "decodeURIComponent function"}} + +The actual message encoded +in the previous URL is “Yes?”, even though the question mark is replaced +by a strange code. Some characters in query strings must be +escaped. The question mark, represented as `%3F`, is one of those. +There seems to be an unwritten rule that every format needs its +own way of escaping characters. This one, called _URL +encoding_, uses a percent sign followed by two hexadecimal digits +that encode the character code. In this case, 3F, which is 63 in +decimal notation, is the code of a question mark character. JavaScript +provides the `encodeURIComponent` and `decodeURIComponent` functions +to encode and decode this format. + +``` +console.log(encodeURIComponent("Hello & goodbye")); +// → Hello%20%26%20goodbye +console.log(decodeURIComponent("Hello%20%26%20goodbye")); +// → Hello & goodbye +``` + +{{index "body (HTTP)", "POST method"}} + +If we change the `method` attribute +of the HTML form in the example we saw earlier to `POST`, the ((HTTP)) request made to submit the +((form)) will use the `POST` method and put the ((query string)) in +body of the request, rather than adding it to the URL. + +```http +POST /example/message.html HTTP/1.1 +Content-length: 24 +Content-type: application/x-www-form-urlencoded + +name=Jean&message=Yes%3F +``` + +By convention, the `GET` method is used for requests that do not have +side effects, such as doing a search. Requests that change something on +the server, such as creating a new account or posting a message, should +be expressed with other methods, such as `POST`. Client-side software, +such as a browser, knows that it shouldn't blindly make `POST` +requests but will often implicitly make `GET` requests—for example, to +prefetch a resource it believes the user will soon need. + +The [next chapter](18_forms.html#forms) will return to forms +and talk about how we can script them with JavaScript. + +{{id xmlhttprequest}} +## XMLHttpRequest + +{{index capitalization, XMLHttpRequest}} + +The ((interface)) through +which browser JavaScript can make HTTP requests is called +`XMLHttpRequest` (note the inconsistent capitalization). It was +designed by ((Microsoft)), for its ((Internet Explorer)) +((browser)), in the late 1990s. During this time, the ((XML)) file format +was _very_ popular in the world of ((business software))—a world where +Microsoft has always been at home. In fact, it was so popular that the +acronym XML was tacked onto the front of the name of an interface for +((HTTP)), which is in no way tied to XML. + +{{index modularity, [interface, design]}} + +The name isn't completely +nonsensical, though. The interface allows you to parse response documents as +XML if you want. Conflating two distinct concepts (making a request +and ((parsing)) the response) into a single thing is terrible design, +of course, but so it goes. + +When the `XMLHttpRequest` interface was added to Internet Explorer, it +allowed people to do things with JavaScript that had been very hard +before. For example, websites started showing lists of suggestions +when the user was typing something into a text field. The script would +send the text to the server over ((HTTP)) as the user typed. The ((server)), +which had some ((database)) of possible inputs, would +match the database entries against the partial input and send back possible +((completion))s to show the user. This was +considered spectacular—people were used to waiting for a full page reload +for every interaction with a website. + +{{index compatibility, Firefox, XMLHttpRequest}} + +The other +significant browser at that time, ((Mozilla)) (later Firefox), did not +want to be left behind. To allow people to do similarly neat things in +_its_ browser, Mozilla copied the interface, including the bogus name. +The next generation of ((browser))s followed this example, and today +`XMLHttpRequest` is a de facto standard ((interface)). + +## Sending a request + +{{index "open method", "send method", XMLHttpRequest}} + +To make a simple +((request)), we create a request object with the `XMLHttpRequest` +constructor and call its `open` and `send` methods. + +{{test trim}} + +``` +var req = new XMLHttpRequest(); +req.open("GET", "example/data.txt", false); +req.send(null); +console.log(req.responseText); +// → This is the content of data.txt +``` + +{{index [path, URL], "open method", "relative URL", "slash character"}} + +The `open` +method configures the request. In this case, we choose to make a `GET` +request for the _example/data.txt_ file. ((URL))s that don't start +with a protocol name (such as _http:_) are relative, which means that +they are interpreted relative to the current document. When they start +with a slash (/), they replace the current path, which is the part after the +server name. When they do not, the part of the current path up to +and including its last slash character is put in front of the relative +URL. + +{{index "send method", "GET method", "body (HTTP)", "responseText property"}} + +After opening the request, we can send it with the `send` +method. The argument to send is the request body. For `GET` requests, +we can pass `null`. If the third argument to `open` was `false`, `send` +will return only after the response to our request was received. We +can read the request object's `responseText` property to get the +response body. + +{{index "status property", "statusText property", header, "getResponseHeader method"}} + +The other +information included in the response can also be extracted from this +object. The ((status code)) is accessible through the `status` +property, and the human-readable status text is accessible through `statusText`. +Headers can be read with `getResponseHeader`. + +{{test no}} + +``` +var req = new XMLHttpRequest(); +req.open("GET", "example/data.txt", false); +req.send(null); +console.log(req.status, req.statusText); +// → 200 OK +console.log(req.getResponseHeader("content-type")); +// → text/plain +``` + +{{index "case sensitivity", capitalization}} + +Header names are +case-insensitive. They are usually written with a capital letter at +the start of each word, such as “Content-Type”, but “content-type” and +“cOnTeNt-TyPe” refer to the same header. + +{{index "Host header", "setRequestHeader method"}} + +The browser will +automatically add some request ((header))s, such as “Host” and those +needed for the server to figure out the size of the body. But you can +add your own headers with the `setRequestHeader` method. This is +needed only for advanced uses and requires the cooperation of the +((server)) you are talking to—a server is free to ignore headers it +does not know how to handle. + +## Asynchronous Requests + +{{index XMLHttpRequest, "event handling", blocking, "synchronous I/O", "responseText property", "send method"}} + +In the examples we +saw, the request has finished when the call to `send` returns. This is +convenient because it means properties such as `responseText` are +available immediately. But it also means that our program is suspended +as long as the ((browser)) and server are communicating. When the +((connection)) is bad, the server is slow, or the file is big, that +might take quite a while. Worse, because no event handlers can fire +while our program is suspended, the whole document will become +unresponsive. + +{{index XMLHttpRequest, "open method", "asynchronous I/O"}} + +If we pass +`true` as the third argument to `open`, the request is _asynchronous_. +This means that when we call `send`, the only thing that happens right +away is that the request is scheduled to be sent. Our program can +continue, and the browser will take care of the sending and receiving +of data in the background. + +But as long as the request is running, we won't be able to access the +response. We need a mechanism that will notify us when the data is +available. + +{{index "event handling", "load event"}} + +For this, we must listen for the +`"load"` event on the request object. + +``` +var req = new XMLHttpRequest(); +req.open("GET", "example/data.txt", true); +req.addEventListener("load", function() { + console.log("Done:", req.status); +}); +req.send(null); +``` + +{{index "asynchronous programming", "callback function"}} + +Just like the use +of `requestAnimationFrame` in [Chapter 15](15_game.html#game), this +forces us to use an asynchronous style of programming, wrapping the +things that have to be done after the request in a function and +arranging for that to be called at the appropriate time. We will come +back to this [later](17_http.html#promises). + +## Fetching XML Data + +{{index "documentElement property", "responseXML property"}} + +When the +resource retrieved by an `XMLHttpRequest` object is an ((XML)) +document, the object's `responseXML` property will hold a parsed +representation of this document. This representation works much like +the ((DOM)) discussed in [Chapter 13](13_dom.html#dom), except that +it doesn't have HTML-specific functionality like the `style` property. +The object that `responseXML` holds corresponds to the `document` +object. Its `documentElement` property refers to the outer tag of the +XML document. In the following document (_example/fruit.xml_), that +would be the `` tag: + +```application/xml + + + + + +``` + +We can retrieve such a file like this: + +{{test no}} + +``` +var req = new XMLHttpRequest(); +req.open("GET", "example/fruit.xml", false); +req.send(null); +console.log(req.responseXML.querySelectorAll("fruit").length); +// → 3 +``` + +{{index "data format"}} + +XML documents can be used to exchange structured +information with the server. Their form—tags nested inside other +tags—lends itself well to storing most types of data, or at least +better than flat text files. The DOM interface is rather clumsy for +extracting information, though, and ((XML)) documents tend to be +verbose. It is often a better idea to communicate using ((JSON)) data, +which is easier to read and write, both for programs and for humans. + +``` +var req = new XMLHttpRequest(); +req.open("GET", "example/fruit.json", false); +req.send(null); +console.log(JSON.parse(req.responseText)); +// → {banana: "yellow", lemon: "yellow", cherry: "red"} +``` + +{{id http_sandbox}} +## HTTP sandboxing + +{{index sandbox}} + +Making ((HTTP)) requests in web page scripts once +again raises concerns about ((security)). The person who controls the +script might not have the same interests as the person on whose +computer it is running. More specifically, if I visit _themafia.org_, +I do not want its scripts to be able to make a request to +_mybank.com_, using identifying information from my ((browser)), with +instructions to transfer all my money to some random ((mafia)) +account. + +It is possible for ((website))s to protect themselves against such +((attack))s, but that requires effort, and many websites fail to do it. +For this reason, browsers protect us by disallowing scripts to make +HTTP requests to other _((domain))s_ (names such as _themafia.org_ and +_mybank.com_). + +{{index "Access-Control-Allow-Origin header", "cross-domain request"}} + +This +can be an annoying problem when building systems that want to access +several domains for legitimate reasons. Fortunately, ((server))s can +include a ((header)) like this in their ((response)) to explicitly +indicate to browsers that it is okay for the request to come from +other domains: + +```null +Access-Control-Allow-Origin: * +``` + +## Abstracting requests + +{{index HTTP, XMLHttpRequest, "backgroundReadFile function"}} + +In +[Chapter 10](10_modules.html#amd), in our implementation of the AMD +module system, we used a hypothetical function called +`backgroundReadFile`. It took a filename and a function and called +that function with the contents of the file when it had finished +fetching it. Here's a simple implementation of that function: + +// include_code + +``` +function backgroundReadFile(url, callback) { + var req = new XMLHttpRequest(); + req.open("GET", url, true); + req.addEventListener("load", function() { + if (req.status < 400) + callback(req.responseText); + }); + req.send(null); +} +``` + +{{index XMLHttpRequest}} + +This simple ((abstraction)) makes it easier to use +`XMLHttpRequest` for simple `GET` requests. If you are writing a +program that has to make HTTP requests, it is a good idea to use a +helper function so that you don't end up repeating the ugly +`XMLHttpRequest` pattern all through your code. + +{{index [function, "as value"], "callback function"}} + +The function argument's +name, `callback`, is a term that is often used to describe functions +like this. A callback function is given to other code to provide that +code with a way to “call us back” later. + +{{index library}} + +It is not hard to write an HTTP utility function, tailored to what your +application is doing. The previous one does only `GET` requests and +doesn't give us control over the headers or the request body. You +could write another variant for `POST` requests or a more generic one +that supports various kinds of requests. Many JavaScript libraries +also provide wrappers for `XMLHttpRequest`. + +{{index "user experience", "error response"}} + +The main problem with the previous +wrapper is its handling of ((failure)). When the request returns +a ((status code)) that indicates an error (400 and up), it does +nothing. This might be okay, in some circumstances, but imagine we put +a “loading” indicator on the page to indicate that we are fetching +information. If the request fails because the server crashed or the +((connection)) is briefly interrupted, the page will just sit there, +misleadingly looking like it is doing something. The user will wait +for a while, get impatient, and consider the site uselessly flaky. + +We should also have an option to be notified when the request fails +so that we can take appropriate action. For example, we could remove the +“loading” message and inform the user that something went wrong. + +{{index "exception handling", "callback function", "error handling", "asynchronous programming", "try keyword", stack}} + +Error handling in asynchronous code is even +trickier than error handling in synchronous code. Because we often need +to defer part of our work, putting it in a callback function, the +scope of a `try` block becomes meaningless. In the following code, the +exception will _not_ be caught because the call to +`backgroundReadFile` returns immediately. Control then leaves the +`try` block, and the function it was given won't be called until +later. + +{{test no}} + +``` +try { + backgroundReadFile("example/data.txt", function(text) { + if (text != "expected") + throw new Error("That was unexpected"); + }); +} catch (e) { + console.log("Hello from the catch block"); +} +``` + +{{index HTTP, "getURL function", exception}} + +{{id getURL}} +To handle failing +requests, we have to allow an additional function to be passed to our +wrapper and call that when a request goes wrong. Alternatively, we +can use the convention that if the request fails, an additional +argument describing the problem is passed to the regular callback +function. Here's an example: + +// include_code + +``` +function getURL(url, callback) { + var req = new XMLHttpRequest(); + req.open("GET", url, true); + req.addEventListener("load", function() { + if (req.status < 400) + callback(req.responseText); + else + callback(null, new Error("Request failed: " + + req.statusText)); + }); + req.addEventListener("error", function() { + callback(null, new Error("Network error")); + }); + req.send(null); +} +``` + +{{index "error event"}} + +We have added a handler for the `"error"` event, +which will be signaled when the request fails entirely. We also call +the ((callback function)) with an error argument when the request +completes with a ((status code)) that indicates an error. + +Code using `getURL` must then check whether an error was given and, if +it finds one, handle it. + +``` +getURL("data/nonsense.txt", function(content, error) { + if (error != null) + console.log("Failed to fetch nonsense.txt: " + error); + else + console.log("nonsense.txt: " + content); +}); +``` + +{{index "uncaught exception", "exception handling", "try keyword"}} + +This +does not help when it comes to exceptions. When chaining several +asynchronous actions together, an exception at any point of the chain +will still (unless you wrap each handling function in its own +`try/catch` block) land at the top level and abort your chain of +actions. + +{{id promises}} +## Promises + +{{index promise, "asynchronous programming", "callback function", readability, "uncaught exception"}} + +For complicated +projects, writing asynchronous code in plain callback style is hard to +do correctly. It is easy to forget to check for an error or to allow +an unexpected exception to cut the program short in a crude way. +Additionally, arranging for correct error handling when the error has +to flow through multiple callback functions and `catch` blocks is +tedious. + +{{index future, "ECMAScript 6"}} + +There have been a lot of attempts to +solve this with extra abstractions. One of the more successful ones is +called _promises_. Promises wrap an asynchronous action in an object, +which can be passed around and told to do certain things when the +action finishes or fails. This interface is set to become part of the next +version of the JavaScript language but can already be used as a +library. + +The ((interface)) for promises isn't entirely intuitive, but it is +powerful. This chapter will only roughly describe it. You can find a more thorough +treatment at +https://www.promisejs.org/[_www.promisejs.org_]. + +{{index "Promise constructor"}} + +To create a promise object, we call the +`Promise` constructor, giving it a function that initializes the +asynchronous action. The constructor calls that function, passing it +two arguments, which are themselves functions. The first should be +called when the action finishes successfully, and the second should be called when it +fails. + +{{index HTTP, "get function"}} + +Once again, here is our wrapper for `GET` +requests, this time returning a promise. We'll simply call it `get` +this time. + +// include_code + +``` +function get(url) { + return new Promise(function(succeed, fail) { + var req = new XMLHttpRequest(); + req.open("GET", url, true); + req.addEventListener("load", function() { + if (req.status < 400) + succeed(req.responseText); + else + fail(new Error("Request failed: " + req.statusText)); + }); + req.addEventListener("error", function() { + fail(new Error("Network error")); + }); + req.send(null); + }); +} +``` + +Note that the ((interface)) to the function itself is now a lot +simpler. You give it a URL, and it returns a ((promise)). That promise +acts as a _handle_ to the request's outcome. It has a `then` method +that you can call with two functions: one to handle success and one +to handle failure. + +``` +get("example/data.txt").then(function(text) { + console.log("data.txt: " + text); +}, function(error) { + console.log("Failed to fetch data.txt: " + error); +}); +``` + +{{index chaining}} + +So far, this is just another way to express the same +thing we already expressed. It is only when you need to chain +actions together that promises make a significant difference. + +{{index "then method"}} + +Calling `then` produces a new ((promise)), whose +result (the value passed to success handlers) depends on the return +value of the first function we passed to `then`. This function may +return another promise to indicate that more asynchronous work is +being done. In this case, the promise returned by `then` itself will +wait for the promise returned by the handler function, succeeding or +failing with the same value when it is resolved. When the handler +function returns a nonpromise value, the promise returned by `then` +immediately succeeds with that value as its result. + +{{index "then method", chaining}} + +This means you can use `then` to +transform the result of a promise. For example, this returns a promise +whose result is the content of the given URL, parsed as ((JSON)): + +// include_code + +``` +function getJSON(url) { + return get(url).then(JSON.parse); +} +``` + +{{index "error handling"}} + +That last call to `then` did not specify a failure +handler. This is allowed. The error will be passed to the promise +returned by `then`, which is exactly what we want—`getJSON` does not +know what to do when something goes wrong, but hopefully its caller +does. + +As an example that shows the use of ((promise))s, we will build a +program that fetches a number of JSON files from the server and, +while it is doing that, shows the word _loading_. The JSON files +contain information about people, with links to files that represent +other people in properties such as `father`, `mother`, or `spouse`. + +{{index "error message", JSON}} + +We want to get the name of the mother of +the spouse of _example/bert.json_. And if something goes wrong, we +want to remove the _loading_ text and show an error message instead. +Here is how that might be done with ((promise))s: + +```text/html + +``` + +{{index "error handling", "catch method", "then method", readability, "program size"}} + +The resulting program is +relatively compact and readable. The `catch` method is similar to +`then`, except that it only expects a failure handler and will pass +through the result unmodified in case of success. Much like with the +`catch` clause for the `try` statement, control will continue as +normal after the failure is caught. That way, the final `then`, which +removes the loading message, is always executed, even if something +went wrong. + +{{index "asynchronous programming", "domain-specific language"}} + +You can +think of the promise interface as implementing its own language for +asynchronous ((control flow)). The extra method calls and function +expressions needed to achieve this make the code look somewhat +awkward but not remotely as awkward as it would look if we took care +of all the error handling ourselves. + +## Appreciating HTTP + +{{index client, HTTP}} + +When building a system that requires +((communication)) between a JavaScript program running in the +((browser)) (client-side) and a program on a ((server)) (server-side), +there are several different ways to model this communication. + +{{index network, abstraction}} + +A commonly used model is that of +_((remote procedure call))s_. In this model, communication follows the +patterns of normal function calls, except that the function is +actually running on another machine. Calling it involves making a +request to the server that includes the function's name and arguments. +The response to that request contains the returned value. + +When thinking in terms of remote procedure calls, HTTP is just a +vehicle for communication, and you will most likely write an +abstraction layer that hides it entirely. + +{{index "media type", "document format"}} + +Another approach is to build your +communication around the concept of ((resource))s and ((HTTP)) +((method))s. Instead of a remote procedure called `addUser`, you use a +`PUT` request to `/users/larry`. Instead of encoding that user's +properties in function arguments, you define a document format or use +an existing format that represents a user. The body of the `PUT` request +to create a new resource is then simply such a document. A resource is +fetched by making a `GET` +request to the resource's URL (for example, `/user/larry`), which +returns the document representing the resource. + +This second approach makes it easier to use some of the features that +HTTP provides, such as support for caching resources (keeping a copy +on the client side). It can also help the coherence of your interface +since resources are easier to reason about than a jumble of functions. + +## Security and HTTPS + +{{index "man-in-the-middle", security, HTTPS}} + +Data traveling over +the Internet tends to follow a long, dangerous road. To get +to its destination, it must hop through anything from coffee-shop Wi-Fi +((network))s to networks controlled by various companies and states. +At any point along its route it may be inspected or even modified. + +{{index tampering}} + +If it is important that something remain secret, +such as the ((password)) to your ((email)) account, or that it arrive +at its destination unmodified, such as the account number you transfer +money to from your bank's website, plain HTTP is not good enough. + +{{index cryptography, encryption}} + +{{indexsee "Secure HTTP", HTTPS}} + +The secure ((HTTP)) protocol, whose +((URL))s start with _https://_, wraps HTTP traffic in a way that makes +it harder to read and tamper with. First, the client verifies that the +server is who it claims to be by requiring that server to prove that it has a +cryptographic ((certificate)) issued by a certificate authority that +the ((browser)) recognizes. Next, all data going over the +((connection)) is encrypted in a way that should prevent eavesdropping +and tampering. + +Thus, when it works right, ((HTTPS)) prevents both the +someone impersonating the website you were trying to talk to and the +someone snooping on your communication. It is not +perfect, and there have been various incidents where HTTPS failed because of +forged or stolen certificates and broken software. Still, plain +HTTP is trivial to mess with, whereas breaking HTTPS requires the kind +of effort that only states or sophisticated criminal organizations can +hope to make. + +## Summary + +In this chapter, we saw that HTTP is a protocol for accessing +resources over the Internet. A _client_ sends a request, which +contains a method (usually `GET`) and a path that identifies a +resource. The _server_ then decides what to do with the request and +responds with a status code and a response body. Both requests and +responses may contain headers that provide additional information. + +Browsers make `GET` requests to fetch the resources needed to display +a web page. A web page may also contain forms, which allow information +entered by the user to be sent along in the request made when the form +is submitted. You will learn more about that in the link:18_forms.html#forms[next +chapter]. + +The interface through which browser JavaScript can make HTTP requests +is called `XMLHttpRequest`. You can usually ignore the “XML” part of +that name (but you still have to type it). There are two ways in which +it can be used—synchronous, which blocks everything until the request +finishes, and asynchronous, which requires an event handler to notice +that the response came in. In almost all cases, asynchronous is +preferable. Making a request looks like this: + +``` +var req = new XMLHttpRequest(); +req.open("GET", "example/data.txt", true); +req.addEventListener("load", function() { + console.log(req.status); +}); +req.send(null); +``` + +Asynchronous programming is tricky. _Promises_ are an interface that +makes it slightly easier by helping route error conditions and +exceptions to the right handler and by abstracting away some of the more +repetitive and error-prone elements in this style of programming. + +## Exercises + +{{id exercise_accept}} +### Content negotiation + +{{index "Accept header", "media type", "document format", "content negotiation (exercise)"}} + +One of the things that HTTP can do, but that +we have not discussed in this chapter, is called _content +negotiation_. The `Accept` header for a request can be used to tell +the server what type of document the client would like to get. Many +servers ignore this header, but when a server knows of various ways to +encode a resource, it can look at this header and send the one that +the client prefers. + +{{index "media type", "MIME type"}} + +The URL +http://eloquentjavascript.net/author[_eloquentjavascript.net/author_] +is configured to respond with either plaintext, HTML, or JSON, +depending on what the client asks for. These formats are identified by +the standardized _media types_ `text/plain`, `text/html`, and +`application/json`. + +{{index "setRequestHeader method", XMLHttpRequest}} + +Send requests to +fetch all three formats of this resource. Use the `setRequestHeader` +method of your `XMLHttpRequest` object to set the header named `Accept` +to one of the media types given earlier. Make sure you set the header +_after_ calling `open` but before calling `send`. + +Finally, try asking for the media type `application/rainbows+unicorns` +and see what happens. + +{{if interactive + +{{test no}} + +``` +// Your code here. +``` + +if}} + +{{hint + +{{index "synchronous I/O", "content negotiation (exercise)"}} + +See the +various examples of using an `XMLHttpRequest` in this chapter for an +example of the method calls involved in making a request. You can use +a synchronous request (by setting the third parameter to `open` to +`false`) if you want. + +{{index "406 (HTTP status code)", "Accept header"}} + +Asking for a bogus +media type will return a response with code 406, “Not acceptable”, +which is the code a server should return when it can't fulfill the +`Accept` header. + +hint}} + +### Waiting for multiple promises + +{{index "all function", "Promise constructor"}} + +The `Promise` constructor +has an `all` method that, given an array of ((promise))s, returns a +promise that waits for all of the promises in the array to finish. It then succeeds, +yielding an array of result values. If any of the promises in +the array fail, the promise returned by `all` fails too (with the +failure value from the failing promise). + +Try to implement something like this yourself as a regular function +called `all`. + +Note that after a promise is resolved (has succeeded or failed), it +can't succeed or fail again, and further calls to the functions that +resolve it are ignored. This can simplify the way you handle failure +of your promise. + +{{if interactive + +{{test no}} + +``` +function all(promises) { + return new Promise(function(success, fail) { + // Your code here. + }); +} + +// Test code. +all([]).then(function(array) { + console.log("This should be []:", array); +}); +function soon(val) { + return new Promise(function(success) { + setTimeout(function() { success(val); }, + Math.random() * 500); + }); +} +all([soon(1), soon(2), soon(3)]).then(function(array) { + console.log("This should be [1, 2, 3]:", array); +}); +function fail() { + return new Promise(function(success, fail) { + fail(new Error("boom")); + }); +} +all([soon(1), fail(), soon(3)]).then(function(array) { + console.log("We should not get here"); +}, function(error) { + if (error.message != "boom") + console.log("Unexpected failure:", error); +}); +``` + +if}} + +{{hint + +{{index "all function", "Promise constructor", "then method"}} + +The +function passed to the `Promise` constructor will have to call `then` +on each of the promises in the given array. When one of them succeeds, +two things need to happen. The resulting value needs to be stored in +the correct position of a result array, and we must check whether this +was the last pending ((promise)) and finish our own promise if it +was. + +{{index "counter variable"}} + +The latter can be done with a counter, which is +initialized to the length of the input array and from which we subtract +1 every time a promise succeeds. When it reaches 0, we are +done. Make sure you take the situation where the input array is empty +(and thus no promise will ever resolve) into account. + +Handling failure requires some thought but turns out to be extremely +simple. Just pass the failure function of the wrapping promise to each +of the promises in the array so that a failure in one of them +triggers the failure of the whole wrapper. + +hint}} + diff --git a/18_forms.md b/18_forms.md new file mode 100644 index 000000000..6d0495900 --- /dev/null +++ b/18_forms.md @@ -0,0 +1,1062 @@ +{{meta {chap_num: 18, prev_link: 17_http, next_link: 19_paint, load_files: ["code/promise.js"]}}} + +# Forms and Form Fields + +{{quote {author: "Mephistopheles,in Goethe's Faust", chapter: true} + +I shall this very day, at Doctor's feast, + +My bounden service duly pay thee. + +But one thing!—For insurance’ sake, I pray thee, + +Grant me a line or two, at least. + +quote}} + +{{index "Goethe, Johann Wolfgang von", Mephistopheles, "page reload", form}} + +Forms were introduced briefly in the +[previous chapter](17_http.html#http_forms) as a way to +_((submit))_ information provided by the user over ((HTTP)). They were +designed for a pre-JavaScript Web, assuming that interaction with the +server always happens by navigating to a new page. + +But their elements are part of the ((DOM)) like the rest of the page, +and the DOM elements that represent form ((field))s support a number +of properties and events that are not present on other elements. These +make it possible to inspect and control such input fields with JavaScript programs +and do things such as adding functionality to a traditional form or using forms +and fields as building blocks in a JavaScript application. + +## Fields + +{{index "form (HTML tag)"}} + +A web form consists of any number of input +((field))s grouped in a `` tag. HTML allows a number of +different styles of fields, ranging from simple on/off checkboxes to +drop-down menus and fields for text input. This book won't try to +comprehensively discuss all field types, but we will start with a rough +overview. + +{{index "input (HTML tag)", "type attribute"}} + +A lot of field types use the +`` tag. This tag's `type` attribute is used to select the +field's style. These are some commonly used `` types: + +{{index "password field", checkbox, "radio button", "file field"}} + +[cols="1,5"] +|==== +|`text` |A single-line ((text field)) +|`password`|Same as `text` but hides the text that is typed +|`checkbox`|An on/off switch +|`radio` |(Part of) a ((multiple-choice)) field +|`file` |Allows the user to choose a file from their computer +|==== + +{{index "value attribute", "checked attribute", "form (HTML tag)"}} + +Form +fields do not necessarily have to appear in a `` tag. You can +put them anywhere in a page. Such fields cannot be ((submit))ted +(only a form as a whole can), but when responding to input with +JavaScript, we often do not want to submit our fields normally anyway. + +```text/html +

    (text)

    +

    (password)

    +

    (checkbox)

    +

    + + (radio)

    +

    (file)

    +``` + +{{if book + +The fields created with this HTML code look like this: + +{{figure {url: "img/form_fields.png", alt: "Various types of input tags",width: "4cm"}}} + +if}} + +The JavaScript interface for such elements differs with the type of +the element. We'll go over each of them later in the chapter. + +{{index "textarea (HTML tag)", "text field"}} + +Multiline text fields have +their own tag, `` closing tag and uses the text +between those two, instead of using its `value` attribute, as starting +text. + +```text/html + +``` + +{{index "select (HTML tag)", "option (HTML tag)", "multiple choice", "drop-down menu"}} + +Finally, the ` + + + + +``` + +{{if book + +Such a field looks like this: + +{{figure {url: "img/form_select.png", alt: "A select field",width: "4cm"}}} + +if}} + +{{index "change event"}} + +Whenever the value of a form field changes, it fires +a `"change"` event. + +## Focus + +{{index keyboard, focus}} + +{{indexsee "keyboard focus", focus}} + +Unlike most elements in an HTML document, +form fields can get _keyboard ((focus))_. When clicked—or activated in +some other way—they become the currently active element, the main +recipient of keyboard ((input)). + +{{index "option (HTML tag)", "select (HTML tag)"}} + +If a document has a +((text field)), text typed will end up in there only when the field is +focused. Other fields respond differently to keyboard events. For +example, a ` + +``` + +{{index "autofocus attribute"}} + +For some pages, the user is expected to +want to interact with a form field immediately. +JavaScript can be used to ((focus)) this field when the document is +loaded, but HTML also provides the `autofocus` attribute, which +produces the same effect but lets the browser know what we are trying +to achieve. This makes it possible for the browser to disable the +behavior when it is not appropriate, such as when the user has focused +something else. + +```text/html focus + +``` + +{{index "tab key", keyboard, "tabindex attribute", "a (HTML tag)"}} + +Browsers traditionally also allow the user to move the focus +through the document by pressing the Tab key. We can influence the +order in which elements receive focus with the `tabindex` attribute. +The following example document will let focus jump from the text input to +the OK button, rather than going through the help link first: + +```text/html focus + (help) + +``` + +{{index "tabindex attribute"}} + +By default, most types of HTML elements cannot +be focused. But you can add a `tabindex` attribute to any element, +which will make it focusable. + +## Disabled fields + +{{index "disabled attribute"}} + +All ((form)) ((field))s can be _disabled_ +through their `disabled` attribute, which also exists as a property on +the element's DOM object. + +```text/html + + +``` + +Disabled fields cannot be ((focus))ed or changed, and unlike active +fields, they usually look gray and faded. + +{{if book + +{{figure {url: "img/button_disabled.png", alt: "A disabled button",width: "3cm"}}} + +if}} + +{{index "user experience", "asynchronous programming"}} + +When a program is +in the process of handling an action caused by some ((button)) or other control, +which might require communication with the server and thus take a +while, it can be a good idea to +disable the control until the action finishes. That way, when the user +gets impatient and clicks it again, they don't accidentally repeat +their action. + +## The form as a whole + +{{index "array-like object", "form (HTML tag)", "form property", "elements property"}} + +When a ((field)) is contained in a +`` element, its DOM element will have a property `form` linking +back to the form's DOM element. The `` element, in turn, has a +property called `elements` that contains an array-like collection of the fields +inside it. + +{{index "elements property", "name attribute"}} + +The `name` attribute of a +form field determines the way its value will be identified when the +form is ((submit))ted. It can also be used as a property name when +accessing the form's `elements` property, which acts both as an +array-like object (accessible by number) and a ((map)) (accessible by +name). + +```text/html + + Name:
    + Password:
    + + + +``` + +{{index "button (HTML tag)", "type attribute", submit, "Enter key"}} + +A button with a `type` attribute of `submit` will, when pressed, +cause the form to be submitted. Pressing Enter when a form field is +focused has the same effect. + +{{index "submit event", "event handling", "preventDefault method", "page reload", "GET method", "POST method"}} + +Submitting +a ((form)) normally means that the +((browser)) navigates to the page indicated by the form's `action` +attribute, using either a `GET` or a `POST` ((request)). But before +that happens, a `"submit"` event is fired. This event can be handled +by JavaScript, and the handler can prevent the default behavior by +calling `preventDefault` on the event object. + +```text/html +
    + Value: + +
    + +``` + +{{index "submit event", validation, XMLHttpRequest}} + +Intercepting +`"submit"` events in JavaScript has various uses. We can write code to +verify that the values the user entered make sense and immediately +show an error message instead of submitting the form when they don't. +Or we can disable the regular way of submitting the form entirely, as +in the previous example, and have our program handle the input, possibly +using `XMLHttpRequest` to send it over to a server without reloading +the page. + +## Text fields + +{{index "value attribute", "input (HTML tag)", "text field", "textarea (HTML tag)"}} + +Fields created by `` tags with a type of `text` or +`password`, as well as `textarea` tags, share a common ((interface)). +Their ((DOM)) elements have a `value` property that holds their +current content as a string value. Setting this property to another string +changes the field's content. + +{{index "selectionStart property", "selectionEnd property"}} + +The +`selectionStart` and `selectionEnd` properties of ((text field))s give +us information about the ((cursor)) and ((selection)) in the ((text)). +When nothing is selected, these two properties hold the same number, +indicating the position of the cursor. For example, 0 indicates the +start of the text, and 10 indicates the cursor is after the 10^th^ ((character)). +When part of the field is selected, the two properties will differ, giving us the +start and end of the selected text. Like `value`, these properties may +also be written to. + +{{index Khasekhemwy, "textarea (HTML tag)", keyboard, "event handling"}} + +As an example, imagine you +are writing an article about Khasekhemwy but have some +trouble spelling his name. The following code wires up a ` + +``` + +{{index "replaceSelection function", "text field"}} + +The `replaceSelection` +function replaces the currently selected part of a text field's +content with the given word and then moves the ((cursor)) after that +word so that the user can continue typing. + +{{index "change event", "input event"}} + +The `"change"` event for a ((text +field)) does not fire every time something is typed. Rather, it +fires when the field loses ((focus)) after its content was changed. +To respond immediately to changes in a text field, you should register +a handler for the `"input"` event instead, which fires for every +time the user types a character, deletes text, or otherwise manipulates +the field's content. + +The following example shows a text field and a counter showing the current +length of the text entered: + +```text/html + length: 0 + +``` + +## Checkboxes and radio buttons + +{{index "input (HTML tag)", "checked attribute"}} + +A ((checkbox)) field is a +simple binary toggle. Its value can be extracted or changed through +its `checked` property, which holds a Boolean value. + +```text/html + + + +``` + +{{index "for attribute", "id attribute", focus, "label (HTML tag)", labeling}} + +The `
    `). The same source data that was used in [Chapter 6](06_object.html#mountains) is again available in the `MOUNTAINS` variable in the sandbox. It can also be http://eloquentjavascript.net/code/mountains.js[downloaded] -from the website(!book (http://eloquentjavascript.net/code#13[_eloquentjavascript.net/code#13_])!). +from the website[(http://eloquentjavascript.net/code#13[_eloquentjavascript.net/code#13_])]{if book}. Write a function `buildTable` that, given an array of objects that all have the same set of properties, builds up a DOM structure diff --git a/15_game.md b/15_game.md index e8923b24e..b03a6fb77 100644 --- a/15_game.md +++ b/15_game.md @@ -38,7 +38,7 @@ from the side, and do lots of jumping onto and over things. Our ((game)) will be roughly based on -http://www.lessmilk.com/games/10[Dark Blue](!book (_www.lessmilk.com/games/10_)!) by Thomas Palef. I chose this game +http://www.lessmilk.com/games/10[Dark Blue][ (_www.lessmilk.com/games/10_)]{if book} by Thomas Palef. I chose this game because it is both entertaining and minimalist, and because it can be built without too much ((code)). It looks like this: @@ -1269,8 +1269,8 @@ and output in general in [Chapter 20](20_node.html#node). {{index game, "GAME_LEVELS data set"}} There is a set of -((level)) plans available in the `GAME_LEVELS` variable (!book (downloadable from -http://eloquentjavascript.net/code#15[_eloquentjavascript.net/code#15_])!). +((level)) plans available in the `GAME_LEVELS` variable [(downloadable from +http://eloquentjavascript.net/code#15[_eloquentjavascript.net/code#15_])]{if book}. This page feeds them to `runGame`, starting an actual game: // start_code diff --git a/16_canvas.md b/16_canvas.md index 6c52ffa21..5b5455ee5 100644 --- a/16_canvas.md +++ b/16_canvas.md @@ -1548,8 +1548,8 @@ going from zero to more than a whole circle, and fill it. To model the ball's position and ((speed)), you can use the `Vector` type from -[Chapter 15](15_game.html#vector)(!interactive (which is available on this -page)!). Give it a starting speed, preferably one that is not purely +[Chapter 15](15_game.html#vector)[(which is available on this +page)]{if interactive}. Give it a starting speed, preferably one that is not purely vertical or horizontal, and every ((frame)), multiply that speed with the amount of time that elapsed. When the ball gets too close to a vertical wall, invert the x component in its speed. Likewise, invert diff --git a/19_paint.md b/19_paint.md index ef6e43128..837781129 100644 --- a/19_paint.md +++ b/19_paint.md @@ -668,7 +668,7 @@ call `Math.sin` and `Math.cos` to create the corresponding point. But with that the dots are more likely to appear near the center of the circle. There are other ways around that, but they're more complicated than the previous loop. -We now have a functioning paint program.(!interactive Run the code below to try it.!) +We now have a functioning paint program.[ Run the code below to try it.]{if interactive} {{if interactive diff --git a/21_skillsharing.md b/21_skillsharing.md index 5e51bcbd9..e13ab69c5 100644 --- a/21_skillsharing.md +++ b/21_skillsharing.md @@ -32,9 +32,9 @@ central organizer. {{figure {url: "img/unicycle.svg", alt: "The unicycling meetup"}}} -(!interactive Just like in the [previous chapter](20_node.html#node), the +[Just like in the [previous chapter](20_node.html#node), the code in this chapter is written for Node.js, and running it directly -in the HTML page that you are looking at is unlikely to work. !)The +in the HTML page that you are looking at is unlikely to work.]{if interactive}The full code for the project can be ((download))ed from http://eloquentjavascript.net/code/skillsharing.zip[_eloquentjavascript.net/code/skillsharing.zip_]. diff --git a/src/convert.js b/src/convert.js index 4db182469..ce237f228 100644 --- a/src/convert.js +++ b/src/convert.js @@ -77,6 +77,9 @@ text = text .replace(/\n!!hint!!\n([^]+?)\n!!hint!!/g, function(_, content) { return `\n{{hint\n${content}\nhint}}` }) + .replace(/\(!(html|book|tex|interactive) ([^]*?)\s?!\)/g, function(_, tag, content) { + return `[${content}]{if ${tag}}` + }) .replace(/\[sic]/, "\\[sic]") console.log(text) diff --git a/src/transform.js b/src/transform.js index 52aae8529..4ac0e22eb 100644 --- a/src/transform.js +++ b/src/transform.js @@ -45,12 +45,21 @@ function smartQuotes(tokens, i) { .slice(from, to) } +function handleIf(tokens, i, options) { + let tag = tokens[i].args[0] + if (options.defined.indexOf(tag) > -1) return i + for (let j = i + 1; j < tokens.length; j++) if (tokens[j].type == "meta_if_close" && tokens[j].args[0] == tag) + return j +} + function transformInline(tokens, options) { let result = [] for (let i = 0; i < tokens.length; i++) { let tok = tokens[i], type = tok.type - if (options.index === false && type == "meta_index") { + if (type == "meta_if_close" || (options.index === false && type == "meta_index")) { // Drop + } else if (type == "meta_if_open") { + i = handleIf(tokens, i, options) } else { if (type == "text" && /[\'\"]/.test(tok.content)) tok.content = smartQuotes(tokens, i) result.push(tok) @@ -71,13 +80,7 @@ exports.transformTokens = function(tokens, options) { break } } else if (type == "meta_if_open") { - let tag = tok.args[0] - if (options.defined.indexOf(tag) == -1) { - for (let j = i + 1; j < tokens.length; j++) if (tokens[j].type == "meta_if_close" && tokens[j].args[0] == tag) { - i = j - break - } - } + i = handleIf(tokens, i, options) } else if (type == "meta_if_close" || (options.index === false && (type == "meta_indexsee" || type == "meta_index"))) { // Drop } else if (tok.tag == "h1") { From 317b8ac87805a73036e6f6196bdde2ec7157c944 Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Fri, 29 Sep 2017 15:06:32 +0200 Subject: [PATCH 028/780] Remove obsolete scripts and non-html targets (The targets will be re-added in the future.) --- 00_intro.txt | 482 ----------- 01_values.txt | 622 --------------- 02_program_structure.txt | 1032 ------------------------ 03_functions.txt | 1033 ------------------------ 04_data.txt | 1464 ---------------------------------- 05_higher_order.txt | 1100 ------------------------- 06_object.txt | 1222 ---------------------------- 07_elife.txt | 1240 ----------------------------- 08_error.txt | 857 -------------------- 09_regexp.txt | 1323 ------------------------------- 10_modules.txt | 931 ---------------------- 11_language.txt | 881 -------------------- 12_browser.txt | 396 --------- 13_dom.txt | 1171 --------------------------- 14_event.txt | 1127 -------------------------- 15_game.txt | 1373 -------------------------------- 16_canvas.txt | 1499 ----------------------------------- 17_http.txt | 991 ----------------------- 18_forms.txt | 966 ---------------------- 19_paint.txt | 996 ----------------------- 20_node.txt | 1264 ----------------------------- 21_skillsharing.txt | 1342 ------------------------------- asciidoc_epub.conf | 128 --- asciidoc_html.conf | 155 ---- asciidoc_nostarch.conf | 128 --- asciidoc_pdf.conf | 136 ---- bin/.tern-project | 3 - bin/add_images_to_epub.js | 24 - bin/addmarks | 51 -- bin/blockfilter | 88 -- bin/build_code.js | 26 - bin/chapter_info.js | 226 ------ bin/clean_latex.js | 77 -- bin/extract_hints.js | 25 - bin/pre_epub.js | 30 - bin/pre_latex.js | 77 -- bin/wrap.tex | 44 - epub/META-INF/container.xml | 6 - epub/content.opf.src | 76 -- epub/font/cinzel_bold.otf | Bin 128156 -> 0 bytes epub/font/pt_mono.otf | Bin 80084 -> 0 bytes epub/frontmatter.xhtml | 38 - epub/mimetype | 1 - epub/style.css | 136 ---- epub/titlepage.xhtml | 16 - epub/toc.xhtml | 77 -- nostarch/book.tex | 143 ---- nostarch/build.sh | 7 - nostarch/nostarch.cls | 740 ----------------- nostarch/nostarch.ins | 33 - nostarch/nostarch.ist | 46 -- nostarch/nshyper.sty | 65 -- pdf/book.tex | 142 ---- pdf/build.sh | 7 - 54 files changed, 26063 deletions(-) delete mode 100644 00_intro.txt delete mode 100644 01_values.txt delete mode 100644 02_program_structure.txt delete mode 100644 03_functions.txt delete mode 100644 04_data.txt delete mode 100644 05_higher_order.txt delete mode 100644 06_object.txt delete mode 100644 07_elife.txt delete mode 100644 08_error.txt delete mode 100644 09_regexp.txt delete mode 100644 10_modules.txt delete mode 100644 11_language.txt delete mode 100644 12_browser.txt delete mode 100644 13_dom.txt delete mode 100644 14_event.txt delete mode 100644 15_game.txt delete mode 100644 16_canvas.txt delete mode 100644 17_http.txt delete mode 100644 18_forms.txt delete mode 100644 19_paint.txt delete mode 100644 20_node.txt delete mode 100644 21_skillsharing.txt delete mode 100644 asciidoc_epub.conf delete mode 100644 asciidoc_html.conf delete mode 100644 asciidoc_nostarch.conf delete mode 100644 asciidoc_pdf.conf delete mode 100644 bin/.tern-project delete mode 100644 bin/add_images_to_epub.js delete mode 100755 bin/addmarks delete mode 100755 bin/blockfilter delete mode 100644 bin/build_code.js delete mode 100644 bin/chapter_info.js delete mode 100644 bin/clean_latex.js delete mode 100644 bin/extract_hints.js delete mode 100644 bin/pre_epub.js delete mode 100644 bin/pre_latex.js delete mode 100644 bin/wrap.tex delete mode 100644 epub/META-INF/container.xml delete mode 100644 epub/content.opf.src delete mode 100644 epub/font/cinzel_bold.otf delete mode 100644 epub/font/pt_mono.otf delete mode 100644 epub/frontmatter.xhtml delete mode 100644 epub/mimetype delete mode 100644 epub/style.css delete mode 100644 epub/titlepage.xhtml delete mode 100644 epub/toc.xhtml delete mode 100644 nostarch/book.tex delete mode 100644 nostarch/build.sh delete mode 100644 nostarch/nostarch.cls delete mode 100644 nostarch/nostarch.ins delete mode 100644 nostarch/nostarch.ist delete mode 100644 nostarch/nshyper.sty delete mode 100644 pdf/book.tex delete mode 100644 pdf/build.sh diff --git a/00_intro.txt b/00_intro.txt deleted file mode 100644 index 603b0b0dc..000000000 --- a/00_intro.txt +++ /dev/null @@ -1,482 +0,0 @@ -:next_link: 01_values -:load_files: ["code/intro.js"] - -= Introduction = - -This is a book about getting ((computer))s to do what you want them to -do. Computers are about as common as screwdrivers today, but they contain a -lot more hidden complexity and thus are harder to operate and -understand. To many, they remain alien, slightly threatening things. - -image::img/generated/computer.png[alt="Communicating with a computer"] - -(((graphical user interface)))We've found two effective ways of -bridging the communication gap between us, squishy biological -organisms with a talent for social and spatial reasoning, and -computers, unfeeling manipulators of meaningless data. The first is to -appeal to our sense of the physical world and build interfaces that -mimic that world and allow us to manipulate shapes on a screen with -our fingers. This works very well for casual machine interaction. - -(((programming language)))But we have not yet found a good way to use -the point-and-click approach to communicate things to the computer -that the designer of the interface did not anticipate. For open-ended -interfaces, such as instructing the computer to perform arbitrary -tasks, we've had more luck with an approach that makes use of our -talent for language: teaching the machine a language. - -(((human language)))(((expressivity)))Human languages allow words and -phrases to be combined in many ways, which allows us to say -many different things. Computer languages, though typically less -grammatically flexible, follow a similar principle. - -(((JavaScript,availability of)))(((casual computing)))Casual computing -has become much more widespread in the past 20 years, and -language-based interfaces, which once were the default way in which -people interacted with computers, have largely been replaced with -graphical interfaces. But they are still there, if you know where to -look. One such language, JavaScript, is built into almost every -web ((browser)) and is thus available on just about every consumer -device. - -indexsee:[web browser,browser]This book intends to make you familiar -enough with this language to be able to make a computer do what you -want. - -== On programming == - -[quote, Confucius] -____ -(((Confucius)))I do not enlighten those who are not eager to learn, -nor arouse those who are not anxious to give an explanation -themselves. If I have presented one corner of the square and they -cannot come back to me with the other three, I should not go over the -points again. -____ - -(((programming,difficulty of)))Besides explaining JavaScript, I also -will introduce the basic principles of programming. Programming, it -turns out, is hard. The fundamental rules are typically simple and -clear. But programs built on top of these rules tend to become complex -enough to introduce their own rules and complexity. You're building -your own maze, in a way, and you might just get lost in it. - -(((learning)))There will be times when reading this book feels terribly -frustrating. If you are new to programming, there will be a lot of new -material to digest. Much of this material will then be _combined_ in -ways that require you to make additional connections. - -It is up to you to make the necessary effort. When you are struggling -to follow the book, do not jump to any conclusions about your own -capabilities. You are fine—you just need to keep at it. Take a break, -reread some material, and _always_ make sure you read and understand -the example programs and ((exercises)). Learning is hard work, but -everything you learn is yours and will make subsequent learning -easier. - -[quote, Joseph Weizenbaum, Computer Power and Human Reason] -____ -(((Weizenbaum+++,+++ Joseph)))The computer programmer is a creator of -universes for which he [sic] alone is responsible. Universes of virtually -unlimited complexity can be created in the form of computer programs. -____ - -(((program,nature of)))(((data)))A program is many things. It is a -piece of text typed by a programmer, it is the directing force that -makes the computer do what it does, it is data in the computer's -memory, yet it controls the actions performed on this same memory. -Analogies that try to compare programs to objects we are familiar with -tend to fall short. A superficially fitting one is that of a -machine—lots of separate parts tend to be involved, and to make the -whole thing tick, we have to consider the ways in which these parts -interconnect and contribute to the operation of the whole. - -(((computer)))A computer is a machine built to act as a host for these -immaterial machines. Computers themselves can do only stupidly -straightforward things. The reason they are so useful is that they do -these things at an incredibly high speed. A program can ingeniously -combine an enormous number of these simple actions in order to do very -complicated things. - -(((programming,joy of)))To some of us, writing computer programs is a -fascinating game. A program is a building of thought. It is costless -to build, it is weightless, and it grows easily under our typing -hands. - -But without care, a program's size and ((complexity)) will grow out of -control, confusing even the person who created it. Keeping programs -under control is the main problem of programming. When a program -works, it is beautiful. The art of programming is the skill of -controlling complexity. The great program is subdued—made simple in -its complexity. - -(((programming style)))(((best practices)))Many programmers believe -that this complexity is best managed by using only a small set of -well-understood techniques in their programs. They have composed -strict rules (“best practices”) prescribing the form programs should -have, and the more zealous among them will consider those who go -outside of this safe little zone to be _bad_ programmers. - -(((experiment)))(((learning)))What hostility to the richness of -programming—to try to reduce it to something straightforward and -predictable, to place a taboo on all the weird and beautiful programs! -The landscape of programming techniques is enormous, fascinating in -its diversity, and still largely unexplored. It is certainly dangerous -going, luring the inexperienced programmer into all kinds of -confusion, but that only means you should proceed with caution and -keep your wits about you. As you learn there will always be new -challenges and new territory to explore. Programmers who refuse to -keep exploring will stagnate, forget their joy, and get bored with -their craft. - -== Why language matters == - -(((programming language)))(((machine code)))(((binary data)))In the -beginning, at the birth of computing, there were no programming -languages. Programs looked something like this: - ----- -00110001 00000000 00000000 -00110001 00000001 00000001 -00110011 00000001 00000010 -01010001 00001011 00000010 -00100010 00000010 00001000 -01000011 00000001 00000000 -01000001 00000001 00000001 -00010000 00000010 00000000 -01100010 00000000 00000000 ----- - -(((programming,history of)))(((punch card)))(((complexity)))That is a -program to add the numbers from 1 to 10 together and print out the -result: `1 + 2 + ... + 10 = 55`. It could run on a simple, -hypothetical machine. To program early computers, it was necessary to -set large arrays of switches in the right position or punch holes in -strips of cardboard and feed them to the computer. You can probably imagine -how tedious and error-prone this procedure was. Even writing simple -programs required much cleverness and discipline. Complex ones were -nearly inconceivable. - -(((bit)))(((wizard (mighty))))Of course, manually entering these -arcane patterns of bits (the ones and zeros) did give the programmer -a profound sense of being a mighty wizard. And that has to be worth -something in terms of job satisfaction. - -(((memory)))(((instruction)))Each line of the previous program contains a -single instruction. It could be written in English like this: - -[source,text/plain] ----- -1. Store the number 0 in memory location 0. -2. Store the number 1 in memory location 1. -3. Store the value of memory location 1 in memory location 2. -4. Subtract the number 11 from the value in memory location 2. -5. If the value in memory location 2 is the number 0, - continue with instruction 9. -6. Add the value of memory location 1 to memory location 0. -7. Add the number 1 to the value of memory location 1. -8. Continue with instruction 3. -9. Output the value of memory location 0. ----- - -(((readability)))(((naming)))(((variable)))Although that is already -more readable than the soup of bits, it is still rather unpleasant. It -might help to use names instead of numbers for the instructions and -memory locations. - -[source,text/plain] ----- - Set “total” to 0. - Set “count” to 1. -[loop] - Set “compare” to “count”. - Subtract 11 from “compare”. - If “compare” is zero, continue at [end]. - Add “count” to “total”. - Add 1 to “count”. - Continue at [loop]. -[end] - Output “total”. ----- - -(((loop)))(((jump)))(((summing example)))Can you see how the program -works at this point? The first two lines give -two memory locations their starting values: `total` will be used to -build up the result of the computation, and `count` will keep track of the -number that we are currently looking at. The lines using `compare` are -probably the weirdest ones. The program wants to see -whether `count` is equal to 11 in order to decide whether it can stop -running. Because our hypothetical machine is rather primitive, it can only -test whether a number is zero and make a decision (or jump) based on -that. So it uses the memory location labeled `compare` to compute the -value of `count - 11` and makes a decision based on that value. The -next two lines add the value of `count` to the result and increment -`count` by 1 every time the program has decided that `count` is not 11 yet. - -Here is the same program in JavaScript: - -[source,javascript] ----- -var total = 0, count = 1; -while (count <= 10) { - total += count; - count += 1; -} -console.log(total); -// → 55 ----- - -(((while loop)))(((loop)))This version gives us a few more improvements. -Most importantly, there is no need to specify the way we want the -program to jump back and forth anymore. The `while` language -construct takes care of that. It continues executing the block -(wrapped in braces) below it as long as the condition it was given -holds. That condition is `count <= 10`, which means “++count++ is less than or equal to -10”. We no longer have to create a temporary value and compare that -to zero, which was an uninteresting detail. Part of the power of -programming languages is that they take care of uninteresting details -for us. - -(((console.log)))At the end of the program, after the `while` construct has -finished, the `console.log` operation is applied to the result in -order to write it as output. - -(((sum function)))(((range -function)))(((abstraction)))(((function)))Finally, here is what the -program could look like if we happened to have the convenient -operations `range` and `sum` available, which respectively create a -((collection)) of numbers within a range and compute the sum of a -collection of numbers: - -// start_code - -[source,javascript] ----- -console.log(sum(range(1, 10))); -// → 55 ----- - -(((readability)))The moral of this story is that the same program can -be expressed in long and short, unreadable and readable ways. The -first version of the program was extremely obscure, whereas this last -one is almost English: `log` the `sum` of the `range` of numbers from -1 to 10. (We will see in link:04_data.html#data[later chapters] how to -build operations like `sum` and `range`.) - -(((programming language,power of)))(((composability)))A good -programming language helps the programmer by allowing them to talk -about the actions that the computer has to perform on a higher level. -It helps omit uninteresting details, provides convenient building -blocks (such as `while` and `console.log`), allows you to define your -own building blocks (such as `sum` and `range`), and makes those blocks -easy to compose. - -== What is JavaScript? == - -indexsee:[WWW,World Wide Web] indexsee:[Web,World Wide Web](((history)))(((Netscape)))(((browser)))(((web -application)))(((JavaScript)))(((JavaScript,history of)))(((World Wide -Web))) JavaScript was introduced in 1995 as a way to add programs to -web pages in the Netscape Navigator browser. The language has since -been adopted by all other major graphical web browsers. It has made modern -web applications possible—applications with which you can interact -directly, without doing a page reload for every action. But it is also used in more -traditional websites to provide various forms of interactivity and -cleverness. - -(((Java)))(((naming)))It is important to note that JavaScript has -almost nothing to do with the programming language named Java. The -similar name was inspired by marketing considerations, rather than -good judgment. When JavaScript was being introduced, the Java language -was being heavily marketed and was gaining popularity. Someone -thought it was a good idea to try to ride along on this success. Now we -are stuck with the name. - -(((ECMAScript)))(((compatibility)))After its adoption outside of -Netscape, a ((standard)) document was written to describe the way the -JavaScript language should work to make sure the various pieces of -software that claimed to support JavaScript were actually talking -about the same language. This is called the ECMAScript standard, after -the Ecma International organization that did the standardization. In -practice, the terms ECMAScript and JavaScript can be used interchangeably—they -are two names for the same language. - -(((JavaScript,weaknesses of)))(((debugging)))There are those who will -say _terrible_ things about the JavaScript language. Many of these -things are true. When I was required to write something in JavaScript -for the first time, I quickly came to despise it. It would accept -almost anything I typed but interpret it in a way that was completely -different from what I meant. This had a lot to do with the fact that I -did not have a clue what I was doing, of course, but there is a real -issue here: JavaScript is ridiculously liberal in what it allows. The -idea behind this design was that it would make programming in -JavaScript easier for beginners. In actuality, it mostly makes finding -problems in your programs harder because the system will not point -them out to you. - -(((JavaScript,flexibility of)))(((flexibility)))This flexibility also -has its advantages, though. It leaves space for a lot of techniques -that are impossible in more rigid languages, and as you will see (for -example in link:10_modules.html#modules[Chapter 10]) it -can be used to overcome some of JavaScript's shortcomings. After -((learning)) the language properly and working with it for a while, I have -learned to actually _like_ JavaScript. - -(((future)))(((JavaScript,versions of)))(((ECMAScript)))(((ECMAScript -6)))There have been several versions of JavaScript. ECMAScript -version 3 was the widely supported version in the time of -JavaScript's ascent to dominance, roughly between 2000 and 2010. -During this time, work was underway on an ambitious version 4, which -planned a number of radical improvements and extensions to the -language. Changing a living, widely used language in such a radical -way turned out to be politically difficult, and work on the version 4 -was abandoned in 2008, leading to the much less ambitious version 5 -coming out in 2009. We're now at the point where all major -browsers support version 5, which is the language version that -this book will be focusing on. A version 6 is in the process of -being finalized, and some browsers are starting to support new -features from this version. - -(((JavaScript,uses of)))Web browsers are not the only platforms on -which JavaScript is used. Some databases, such as MongoDB and CouchDB, -use JavaScript as their scripting and query language. Several -platforms for desktop and server programming, most notably the -((Node.js)) project (the subject of link:20_node.html#node[Chapter -20]) are providing a powerful environment for programming JavaScript -outside of the browser. - -== Code, and what to do with it == - -(((reading code)))(((writing code)))Code is the text that makes up -programs. Most chapters in this book contain quite a lot of it. In my -experience, reading code and writing ((code)) are indispensable parts of -((learning)) to program, so try to not just glance over the examples. Read -them attentively and understand them. This may be slow and confusing -at first, but I promise that you will quickly get the hang of it. The -same goes for the ((exercises)). Don't assume you understand them -until you've actually written a working solution. - -(((interpretation)))I recommend you try your solutions to exercises -in an actual JavaScript interpreter. That way, you'll get immediate feedback on -whether what you are doing is working, and, I hope, you'll be -tempted to ((experiment)) and go beyond the exercises. - -ifdef::interactive_target[] - -When reading this book in your browser, you can edit (and run) all -example programs by clicking them. - -endif::interactive_target[] - -ifdef::book_target[] - -(((download)))(((sandbox)))(((running code)))The easiest way to run -the example code in the book, and to experiment with it, is to look it -up in the online version of the book at -http://eloquentjavascript.net/[_eloquentjavascript.net_]. There, you -can click any code example to edit and run it and to see the -output it produces. To work on the exercises, go to -http://eloquentjavascript.net/code[_eloquentjavascript.net/code_], -which provides starting code for each coding exercise and allows you -to look at the solutions. - -endif::book_target[] - -(((developer tools)))(((JavaScript console)))If you want to run the -programs defined in this book outside of the book's sandbox, some care -is required. Many examples stand on their own and should work in any -JavaScript environment. But code in later chapters is mostly written -for a specific environment (the browser or Node.js) and can run only -there. In addition, many chapters define bigger programs, and the -pieces of code that appear in them depend on each other or on external -files. The http://eloquentjavascript.net/code[sandbox] on the website -provides links to Zip files containing all of the scripts and data -files necessary to run the code for a given chapter. - -== Overview of this book == - -This book contains roughly three parts. The first 11 chapters discuss -the JavaScript language itself. The next eight chapters are about web -((browsers)) and the way JavaScript is used to program them. Finally, -two chapters are devoted to ((Node.js)), another environment to program -JavaScript in. - -Throughout the book, there are five _project chapters_, which describe -larger example programs to give you a taste of real programming. In -order of appearance, we will work through building an -link:07_elife.html#elife[artificial life simulation], a -link:11_language.html#language[programming language], a -link:15_game.html#game[platform game], a -link:19_paint.html#paint[paint program], and a -link:21_skillsharing.html#skillsharing[dynamic website]. - -The language part of the book starts with four chapters to introduce -the basic structure of the JavaScript language. They introduce -link:02_program_structure.html#program_structure[control structures] -(such as the `while` word you saw in this introduction), -link:03_functions.html#functions[functions] (writing your own -operations), and link:04_data.html#data[data structures]. After these, -you will be able to write simple programs. Next, Chapters -link:05_higher_order.html#higher_order[5] and -link:06_object.html#object[6] introduce techniques to use functions -and objects to write more _abstract_ code and thus keep complexity -under control. - -After a link:07_elife.html#elife[first project chapter], the first -part of the book continues with chapters on -link:08_error.html#error[error handling and fixing], on -link:09_regexp.html#regexp[regular expressions] (an important tool for -working with text data), and on -link:10_modules.html#modules[modularity]—another weapon against -complexity. The link:11_language.html#language[second project chapter] -concludes the first part of the book. - -The second part, Chapters link:12_browser.html#browser[12] to -link:19_paint.html#paint[19], describes the tools that browser -JavaScript has access to. You'll learn to display things on the screen -(Chapters link:13_dom.html#dom[13] and -link:16_canvas.html#canvas[16]), respond to user input (Chapters -link:14_event.html#event[14] and link:18_forms.html#forms[18]), and -communicate over the network (link:17_http.html#http[Chapter 17]). -There are again two project chapters in this part. - -After that, link:20_node.html#node[Chapter 20] describes Node.js, and -link:21_skillsharing.html#skillsharing[Chapter 21] builds a simple web -system using that tool. - -ifdef::commercial_target[] - -Finally, link:22_fast.html#fast[Chapter 22] describes some of the -considerations that come up when optimizing JavaScript programs for -speed. - -endif::commercial_target[] - -== Typographic conventions == - -(((factorial function)))In this book, text written in a `monospaced` -font will represent elements of programs—sometimes -they are self-sufficient fragments, and sometimes they just refer to -part of a nearby program. Programs (of which you have already seen a -few), are written as follows: - -[source,javascript] ----- -function fac(n) { - if (n == 0) - return 1; - else - return fac(n - 1) * n; -} ----- - -(((console.log)))Sometimes, in order to show the output that a program -produces, the expected output is written after it, with two slashes -and an arrow in front. - -[source,javascript] ----- -console.log(fac(8)); -// → 40320 ----- - -Good luck! diff --git a/01_values.txt b/01_values.txt deleted file mode 100644 index 97c7d442d..000000000 --- a/01_values.txt +++ /dev/null @@ -1,622 +0,0 @@ -:chap_num: 1 -:prev_link: 00_intro -:next_link: 02_program_structure -:docid: values - -= Values, Types, and Operators = - -[chapterquote="true"] -[quote, Master Yuan-Ma, The Book of Programming] -____ -Below the surface of the -machine, the program moves. Without effort, it expands and contracts. -In great harmony, electrons scatter and regroup. The forms on the -monitor are but ripples on the water. The essence stays invisibly -below. -____ - -(((Yuan-Ma)))(((Book of Programming)))(((binary -data)))(((data)))(((bit)))(((memory)))Inside the computer's world, -there is only data. You can read data, modify data, create new -data—but anything that isn't data simply does not exist. All this data -is stored as long sequences of bits and is thus fundamentally alike. - -(((CD)))(((signal)))Bits are any kind of two-valued things, usually -described as zeros and ones. Inside the computer, they take forms -such as a high or low electrical charge, a strong or weak signal, or a -shiny or dull spot on the surface of a CD. Any piece of discrete -information can be reduced to a sequence of zeros and ones and thus -represented in bits. - -(((binary number)))(((radix)))(((decimal number)))For example, think -about how you might show the number 13 in bits. It works the same way -you write decimal numbers, but instead of 10 different ((digit))s, you -have only 2, and the weight of each increases by a factor of 2 from -right to left. Here are the bits that make up the number 13, with the -weights of the digits shown below them: - ----- - 0 0 0 0 1 1 0 1 - 128 64 32 16 8 4 2 1 ----- - -So that's the binary number 00001101, or 8 + 4 + 1, which equals 13. - -== Values == - -(((memory)))(((volatile data storage)))(((hard drive)))Imagine a sea of -bits. An ocean of them. A typical modern computer has more than 30 -billion bits in its volatile data storage. Nonvolatile storage (the -hard disk or equivalent) tends to have yet a few orders of magnitude -more. - -image::img/bit-sea.png[alt="The Ocean of Bits"] - -To be able to work with such quantities of bits without getting lost, -you can separate them into chunks that represent pieces of -information. In a JavaScript environment, those chunks are called -_((value))s_. Though all values are made of bits, they play different -roles. Every value has a ((type)) that determines its role. There are -six basic types of values in JavaScript: numbers, strings, Booleans, -objects, functions, and undefined values. - -(((garbage collection)))To create a value, you must merely invoke its -name. This is convenient. You don't have to gather building material -for your values or pay for them. You just call for one, and _woosh_, -you have it. They are not created from thin air, of course. Every -value has to be stored somewhere, and if you want to use a gigantic -amount of them at the same time, you might run out of bits. -Fortunately, this is a problem only if you need them all -simultaneously. As soon as you no longer use a value, it will -dissipate, leaving behind its bits to be recycled as building material -for the next generation of values. - -This chapter introduces the atomic elements of JavaScript programs, -that is, the simple value types and the operators that can act on such -values. - -== Numbers == - -(((syntax)))(((number)))(((number,notation)))Values of the -_number_ type are, unsurprisingly, numeric values. In a JavaScript -program, they are written as follows: - -[source,javascript] ----- -13 ----- - -(((binary number)))Use that in a program, and it will cause the bit -pattern for the number 13 to come into existence inside the computer's -memory. - -(((number,representation)))(((bit)))JavaScript uses a fixed -number of bits, namely 64 of them, to store a single number value. -There are only so many patterns you can make with 64 bits, which means -that the amount of different numbers that can be represented is -limited. For _N_ decimal ((digit))s, the amount of numbers that can be -represented is 10^_N_^. Similarly, given 64 binary digits, you can -represent 2^64^ different numbers, which is about 18 quintillion (an -18 with 18 zeros after it). This is a lot. - -Computer memory used to be a lot smaller, and people tended to use -groups of 8 or 16 bits to represent their numbers. It was easy to -accidentally _((overflow))_ such small numbers—to end up with a number -that did not fit into the given amount of bits. Today, even personal -computers have plenty of memory, so you are free to use 64-bit chunks, -which means you need to worry about overflow only when dealing with -truly astronomical numbers. - -(((sign)))(((floating-point number)))(((fractional number)))(((sign bit)))Not -all whole numbers below 18 quintillion fit in a JavaScript number, -though. Those bits also store negative numbers, so one bit indicates -the sign of the number. A bigger issue is that nonwhole numbers must -also be represented. To do this, some of the bits are used to store -the position of the decimal point. The actual maximum whole number -that can be stored is more in the range of 9 quadrillion (15 zeros), -which is still pleasantly huge. - -(((number,notation)))Fractional numbers are written by using a -dot. - -[source,javascript] ----- -9.81 ----- - -(((exponent)))(((scientific notation)))(((number,notation)))For -very big or very small numbers, you can also use scientific notation -by adding an “e” (for “exponent”), followed by the exponent of the -number: - -[source,javascript] ----- -2.998e8 ----- - -That is 2.998 × 10^8^ = 299,800,000. - -(((pi)))(((number,precision of)))(((floating-point -number)))Calculations with whole numbers (also called _((integer))s_) -smaller than the aforementioned 9 quadrillion are guaranteed to always -be precise. Unfortunately, calculations with fractional numbers are -generally not. Just as π (pi) cannot be precisely expressed by a -finite number of decimal digits, many numbers lose some precision when -only 64 bits are available to store them. This is a shame, but it -causes practical problems only in specific situations. The important -thing is to be aware of it and treat fractional digital numbers as -approximations, not as precise values. - -=== Arithmetic === - -(((syntax)))(((operator)))(((binary -operator)))(((arithmetic)))(((addition)))(((multiplication))) The main -thing to do with numbers is arithmetic. Arithmetic operations such as -addition or multiplication take two number values and produce a new -number from them. Here is what they look like in JavaScript: - -[source,javascript] ----- -100 + 4 * 11 ----- - -(((operator,application)))(((asterisk)))(((plus -character)))(((pass:[*] operator)))(((+ operator)))The `+` and `*` -symbols are called _operators_. The first stands for addition, and the -second stands for multiplication. Putting an operator between two -values will apply it to those values and produce a new value. - -(((grouping)))(((parentheses)))(((precedence)))Does the example mean -“add 4 and 100, and multiply the result by 11”, or is the -multiplication done before the adding? As you might have guessed, the -multiplication happens first. But as in mathematics, you can change -this by wrapping the addition in parentheses. - -[source,javascript] ----- -(100 + 4) * 11 ----- - -(((dash character)))(((slash -character)))(((division)))(((subtraction)))(((minus)))(((- -operator)))(((/ operator)))For subtraction, there is the `-` operator, -and division can be done with the `/` operator. - -When operators appear together without parentheses, the order in which -they are applied is determined by the _((precedence))_ of the -operators. The example shows that multiplication comes before -addition. The `/` operator has the same precedence as `*`. Likewise -for `+` and `-`. When multiple operators with the same precedence -appear next to each other, as in `1 - 2 + 1`, they are applied left -to right: `(1 - 2) + 1`. - -These rules of precedence are not something you should worry about. -When in doubt, just add parentheses. - -(((modulo operator)))(((division)))(((remainder operator)))(((% -operator)))There is one more arithmetic operator, which you might not -immediately recognize. The `%` symbol is used to represent the -_remainder_ operation. `X % Y` is the remainder of dividing `X` by -`Y`. For example, `314 % 100` produces `14`, and `144 % 12` gives `0`. -Remainder's precedence is the same as that of multiplication and -division. You'll often see this operator referred to as _modulo_, -though technically _remainder_ is more accurate. - -=== Special numbers === - -(((number,special values)))There are three special values in -JavaScript that are considered numbers but don't behave like normal -numbers. - -(((infinity)))The first two are `Infinity` and `-Infinity`, which -represent the positive and negative infinities. `Infinity - 1` is -still `Infinity`, and so on. Don't put too much trust in -infinity-based computation. It isn't mathematically solid, and it will -quickly lead to our next special number: `NaN`. - -(((NaN)))(((not a number)))(((division by zero)))`NaN` stands for “not -a number”, even though it is a value of the number type. You'll get -this result when you, for example, try to calculate `0 / 0` (zero -divided by zero), `Infinity - Infinity`, or any number of other -numeric operations that don't yield a precise, meaningful result. - -== Strings == - -(((syntax)))(((text)))(((character)))(((string,notation)))(((single-quote -character)))(((double-quote character)))(((quotation mark)))The next -basic data type is the _((string))_. Strings are used to represent -text. They are written by enclosing their content in quotes. - -[source,javascript] ----- -"Patch my boat with chewing gum" -'Monkeys wave goodbye' ----- - -Both single and double quotes can be used to mark strings as long as -the quotes at the start and the end of the string match. - -(((line break)))(((newline character)))Almost anything can be put -between quotes, and JavaScript will make a string value out of it. But -a few characters are more difficult. You can imagine how putting -quotes between quotes might be hard. _Newlines_ (the characters you -get when you press Enter) also can't be put between quotes. The string -has to stay on a single line. - -(((escaping,in strings)))(((backslash character)))To make it possible to include -such characters in a string, the following notation is used: whenever -a backslash (`\`) is found inside quoted text, it indicates that the -character after it has a special meaning. This is called _escaping_ -the character. A quote that is preceded by a backslash will not end -the string but be part of it. When an `n` character occurs after a -backslash, it is interpreted as a newline. Similarly, a `t` after a -backslash means a ((tab character)). Take the following string: - -[source,javascript] ----- -"This is the first line\nAnd this is the second" ----- - -The actual text contained is this: - ----- -This is the first line -And this is the second ----- - -There are, of course, situations where you want a backslash in a -string to be just a backslash, not a special code. If two backslashes -follow each other, they will collapse together, and only one will be -left in the resulting string value. This is how the string “++A newline -character is written like "\n".++” can be expressed: - -[source,javascript] ----- -"A newline character is written like \"\\n\"." ----- - -(((+ operator)))(((concatenation)))Strings cannot be divided, -multiplied, or subtracted, but the `+` operator _can_ be used on them. -It does not add, but it __concatenates__—it glues two strings together. -The following line will produce the string `"concatenate"`: - -[source,javascript] ----- -"con" + "cat" + "e" + "nate" ----- - -There are more ways of manipulating strings, which we will discuss -when we get to methods in link:04_data.html#methods[Chapter 4]. - -== Unary operators == - -(((operator)))(((typeof operator)))(((type)))Not all operators are -symbols. Some are written as words. One example is the `typeof` -operator, which produces a string value naming the type of the value -you give it. - -[source,javascript] ----- -console.log(typeof 4.5) -// → number -console.log(typeof "x") -// → string ----- - -[[console.log]] - -(((console.log)))(((output)))(((JavaScript console)))We will use -`console.log` in example code to indicate that we want to see the -result of evaluating something. When you run such code, the value -produced should be shown on the screen, though how it appears will -depend on the JavaScript environment you use to run it. - -(((negation)))(((- operator)))(((binary operator)))(((unary -operator)))The other operators we saw all operated on two values, but -`typeof` takes only one. Operators that use two values are called -_binary_ operators, while those that take one are called _unary_ -operators. The minus operator can be used both as a binary operator -and as a unary operator. - -[source,javascript] ----- -console.log(- (10 - 2)) -// → -8 ----- - -== Boolean values == - -(((Boolean)))(((operator)))(((true)))(((false)))(((bit)))Often, -you will need a value that simply distinguishes between two -possibilities, like “yes” and “no” or “on” and “off”. For this, -JavaScript has a _Boolean_ type, which has just two values: true and -false (which are written simply as those words). - -=== Comparisons === - -(((comparison)))Here is one way to produce Boolean values: - -[source,javascript] ----- -console.log(3 > 2) -// → true -console.log(3 < 2) -// → false ----- - -(((comparison,of numbers)))(((> operator)))(((< operator)))(((greater -than)))(((less than)))The `>` and `<` signs are the traditional -symbols for “is greater than” and “is less than”, respectively. They -are binary operators. Applying them results in a Boolean value that -indicates whether they hold true in this case. - -Strings can be compared in the same way. - -[source,javascript] ----- -console.log("Aardvark" < "Zoroaster") -// → true ----- - -(((comparison,of strings)))The way strings are ordered is more or less -alphabetic: uppercase letters are always “less” than lowercase ones, -so `"Z" < "a"` is true, and non-alphabetic characters (!, -, and so on) -are also included in the ordering. The actual comparison is based on -the _((Unicode))_ standard. This standard assigns a number to -virtually every character you would ever need, including characters -from Greek, Arabic, Japanese, Tamil, and so on. Having such numbers is -useful for storing strings inside a computer because it makes it -possible to represent them as a sequence of numbers. When comparing -strings, JavaScript goes over them from left to right, comparing the -numeric codes of the characters one by one. - -(((equality)))(((>= operator)))(((pass:[<=] operator)))(((== -operator)))(((!= operator)))Other similar operators are `>=` (greater -than or equal to), `<=` (less than or equal to), `==` (equal to), and -`!=` (not equal to). - -[source,javascript] ----- -console.log("Itchy" != "Scratchy") -// → true ----- - -(((comparison,of NaN)))(((NaN)))There is only one value in JavaScript -that is not equal to itself, and that is `NaN`, which stands for “not -a number”. - -[source,javascript] ----- -console.log(NaN == NaN) -// → false ----- - -`NaN` is supposed to denote the result of a nonsensical computation, -and as such, it isn't equal to the result of any _other_ nonsensical -computations. - -=== Logical operators === - -(((reasoning)))(((logical operators)))There are also some operations -that can be applied to Boolean values themselves. JavaScript supports -three logical operators: _and_, _or_, and _not_. These can be used to -“reason” about Booleans. - -(((&& operator)))(((logical and)))The `&&` operator represents logical -_and_. It is a binary operator, and its result is true only if both -the values given to it are true. - -[source,javascript] ----- -console.log(true && false) -// → false -console.log(true && true) -// → true ----- - -(((|| operator)))(((logical or)))The `||` operator denotes logical -_or_. It produces true if either of the values given to it is true. - -[source,javascript] ----- -console.log(false || true) -// → true -console.log(false || false) -// → false ----- - -(((negation)))(((! operator)))_Not_ is written as an exclamation mark -(`!`). It is a unary operator that flips the value given to it—`!true` -produces `false` and `!false` gives `true`. - -(((precedence)))When mixing these Boolean operators with arithmetic -and other operators, it is not always obvious when parentheses are -needed. In practice, you can usually get by with knowing that of the -operators we have seen so far, `||` has the lowest precedence, then -comes `&&`, then the comparison operators (`>`, `==`, and so on), and -then the rest. This order has been chosen such that, in typical -expressions like the following one, as few parentheses as possible are -necessary: - -[source,javascript] ----- -1 + 1 == 2 && 10 * 10 > 50 ----- - -(((conditional execution)))(((ternary operator)))(((?: -operator)))(((conditional operator)))(((colon character)))(((question -mark)))The last logical operator I will discuss is not unary, not -binary, but _ternary_, operating on three values. It is written with a -question mark and a colon, like this: - -[source,javascript] ----- -console.log(true ? 1 : 2); -// → 1 -console.log(false ? 1 : 2); -// → 2 ----- - -This one is called the _conditional_ operator (or sometimes just -_ternary_ operator since it is the only such operator in the -language). The value on the left of the question mark “picks” which of -the other two values will come out. When it is true, the middle value -is chosen, and when it is false, the value on the right comes out. - -== Undefined values == - -(((undefined)))(((null)))There are two special values, written `null` -and `undefined`, that are used to denote the absence of a meaningful -value. They are themselves values, but they carry no -information. - -Many operations in the language that don't produce a meaningful value -(you'll see some later) yield `undefined` simply because they have to -yield _some_ value. - -The difference in meaning between `undefined` and `null` is an accident -of JavaScript's design, and it doesn't matter most of the time. In the cases -where you actually have to concern yourself with these values, I -recommend treating them as interchangeable (more on that in a moment). - -== Automatic type conversion == - -(((NaN)))(((type coercion)))In the introduction, I mentioned that -JavaScript goes out of its way to accept almost any program you give -it, even programs that do odd things. This is nicely demonstrated by -the following expressions: - -[source,javascript] ----- -console.log(8 * null) -// → 0 -console.log("5" - 1) -// → 4 -console.log("5" + 1) -// → 51 -console.log("five" * 2) -// → NaN -console.log(false == 0) -// → true ----- - -(((+ operator)))(((arithmetic)))(((pass:[*] operator)))(((- -operator)))When an operator is applied to the “wrong” type of value, -JavaScript will quietly convert that value to the type it wants, using -a set of rules that often aren't what you want or expect. This is -called _((type coercion))_. So the `null` in the first expression becomes -`0`, and the `"5"` in the second expression becomes `5` (from string -to number). Yet in the third expression, `+` tries string -concatenation before numeric addition, so the `1` is converted to -`"1"` (from number to string). - -(((type coercion)))(((number,conversion to)))When something that -doesn't map to a number in an obvious way (such as `"five"` or -`undefined`) is converted to a number, the value `NaN` is produced. -Further arithmetic operations on `NaN` keep producing `NaN`, so if you -find yourself getting one of those in an unexpected place, look for -accidental type conversions. - -(((null)))(((undefined)))(((comparison,of undefined values)))(((== -operator)))When comparing values of the same type using `==`, the -outcome is easy to predict: you should get true when both values are -the same, except in the case of `NaN`. But when the types differ, -JavaScript uses a complicated and confusing set of rules to determine -what to do. In most cases, it just tries to convert one of the values -to the other value's type. However, when `null` or `undefined` occurs -on either side of the operator, it produces true only if both sides -are one of `null` or `undefined`. - -[source,javascript] ----- -console.log(null == undefined); -// → true -console.log(null == 0); -// → false ----- - -That last piece of behavior is often useful. When you want to test -whether a value has a real value instead of `null` or `undefined`, you -can simply compare it to `null` with the `==` (or `!=`) operator. - -(((type coercion)))(((Boolean,conversion to)))(((=== operator)))(((!== -operator)))(((comparison)))But what if you want to test whether -something refers to the precise value `false`? The rules for -converting strings and numbers to Boolean values state that `0`, -`NaN`, and the empty string (`""`) count as `false`, while all the -other values count as `true`. Because of this, expressions like `0 == -false` and `"" == false` are also true. For cases like this, where you -do _not_ want any automatic type conversions to happen, there are two -extra operators: `===` and `!==`. The first tests whether a value is -precisely equal to the other, and the second tests whether it is not -precisely equal. So `"" === false` is false as expected. - -I recommend using the three-character comparison operators defensively to -prevent unexpected type conversions from tripping you up. But when you're -certain the types on both sides will be the same, there is no problem with -using the shorter operators. - -=== Short-circuiting of logical operators === - -(((type coercion)))(((Boolean,conversion to)))(((operator)))The -logical operators `&&` and `||` handle values of different types in a -peculiar way. They will convert the value on their left side to -Boolean type in order to decide what to do, but depending on the -operator and the result of that conversion, they return either the -_original_ left-hand value or the right-hand value. - -(((|| operator)))The `||` operator, for example, will return the value -to its left when that can be converted to true and will return the -value on its right otherwise. This conversion works as you'd expect -for Boolean values and should do something analogous for values of -other types. - -[source,javascript] ----- -console.log(null || "user") -// → user -console.log("Karl" || "user") -// → Karl ----- - -(((default value)))This functionality allows the `||` operator to be -used as a way to fall back on a default value. If you give it an -expression that might produce an empty value on the left, the value on -the right will be used as a replacement in that case. - -(((&& operator)))The `&&` operator works similarly, but the other way -around. When the value to its left is something that converts to -false, it returns that value, and otherwise it returns the value on -its right. - -(((short-circuit evaluation)))Another important property of these two -operators is that the expression to their right is evaluated only when -necessary. In the case of `true || X`, no matter what `X` is—even if -it's an expression that does something __terrible__—the result will be -true, and `X` is never evaluated. The same goes for `false && X`, -which is false and will ignore `X`. This is called _short-circuit -evaluation_. - -(((ternary operator)))(((?: operator)))(((conditional operator)))The -conditional operator works in a similar way. The first expression is -always evaluated, but the second or third value, the one that is not -picked, is not. - -== Summary == - -We looked at four types of JavaScript values in this chapter: numbers, -strings, Booleans, and undefined values. - -Such values are created by typing in their name (`true`, `null`) or -value (`13`, `"abc"`). You can combine and transform values with -operators. We saw binary operators for arithmetic (`+`, `-`, `*`, `/`, -and `%`), string concatenation (`+`), comparison (`==`, `!=`, `===`, -`!==`, `<`, `>`, `<=`, `>=`), and logic (`&&`, `||`), as well as -several unary operators (`-` to negate a number, `!` to negate -logically, and `typeof` to find a value's type) and a ternary -operator (`?:`) to pick one of two values based on a third value. - -This gives you enough information to use JavaScript as a pocket -calculator, but not much more. The -link:02_program_structure.html#program_structure[next chapter] will -start tying these expressions together into basic programs. diff --git a/02_program_structure.txt b/02_program_structure.txt deleted file mode 100644 index dc6d6eea8..000000000 --- a/02_program_structure.txt +++ /dev/null @@ -1,1032 +0,0 @@ -:chap_num: 2 -:prev_link: 01_values -:next_link: 03_functions - -= Program Structure = - -[chapterquote="true"] -[quote, _why, Why's (Poignant) Guide to Ruby] -____ -And my heart glows bright red under my -filmy, translucent skin and they have to administer 10cc of JavaScript -to get me to come back. (I respond well to toxins in the blood.) Man, -that stuff will kick the peaches right out your gills! -____ - -(((why)))(((Poignant Guide)))In this chapter, we will start to do -things that can actually be called _programming_. We will expand our -command of the JavaScript language beyond the nouns and sentence -fragments we've seen so far, to the point where we can -express some meaningful prose. - -== Expressions and statements == - -(((grammar)))(((syntax)))(((code,structure -of)))(((grammar)))(((JavaScript,syntax)))In -link:01_values.html#values[Chapter 1], we made some values and then -applied operators to them to get new values. Creating values like this -is an essential part of every JavaScript program, but it is only -a part. - -(((literal expression)))A fragment of code that produces a value is -called an _((expression))_. Every value that is written literally -(such as `22` or `"psychoanalysis"`) is an expression. An expression -between ((parentheses)) is also an expression, as is a ((binary -operator)) applied to two expressions or a unary operator applied to -one. - -(((nesting,of expressions)))(((human language)))This shows part of the -beauty of a language-based interface. Expressions can nest in a way -very similar to the way subsentences in human languages are nested—a -subsentence can contain its own subsentences, and so on. This allows -us to combine expressions to express arbitrarily complex computations. - -(((statement)))(((semicolon)))(((program)))If an expression -corresponds to a sentence fragment, a JavaScript _statement_ -corresponds to a full sentence in a human language. A program is -simply a list of statements. - -(((syntax)))The simplest kind of statement is an expression with a -semicolon after it. This is a program: - -[source,javascript] ----- -1; -!false; ----- - -It is a useless program, though. An ((expression)) can be content to -just produce a value, which can then be used by the enclosing -expression. A ((statement)) stands on its own and amounts to something -only if it affects the world. It could display something on the -screen—that counts as changing the world—or it could change the -internal state of the machine in a way that will affect the statements -that come after it. These changes are called _((side effect))s_. The -statements in the previous example just produce the values `1` and -`true` and then immediately throw them away. This leaves no impression -on the world at all. When executing the program, nothing observable -happens. - -(((programming style)))(((automatic semicolon -insertion)))(((semicolon)))In some cases, JavaScript allows you to -omit the semicolon at the end of a statement. In other cases, it has -to be there, or the next ((line)) will be treated as part of the same -statement. The rules for when it can be safely omitted are somewhat -complex and error-prone. In this book, every statement that needs a -semicolon will always be terminated by one. I recommend you do the -same in your own programs, at least until you've learned more about -subtleties involved in leaving out semicolons. - -== Variables == - -(((syntax)))(((variable,definition)))(((side effect)))(((memory)))How -does a program keep an internal ((state))? How does it remember -things? We have seen how to produce new values from old values, but -this does not change the old values, and the new value has to be -immediately used or it will dissipate again. To catch and hold values, -JavaScript provides a thing called a _variable_. - -[source,javascript] ----- -var caught = 5 * 5; ----- - -(((var keyword)))And that gives us our second kind of ((statement)). -The special word (_((keyword))_) `var` indicates that this sentence is -going to define a variable. It is followed by the name of the variable -and, if we want to immediately give it a value, by an `=` operator and -an expression. - -The previous statement creates a variable called `caught` and uses it -to grab hold of the number that is produced by multiplying 5 by 5. - -After a variable has been defined, its name can be used as an -((expression)). The value of such an expression is the value the -variable currently holds. Here's an example: - -[source,javascript] ----- -var ten = 10; -console.log(ten * ten); -// → 100 ----- - -(((underscore character)))(((dollar -sign)))(((variable,naming)))Variable names can be any word that isn't -a reserved word (such as `var`). They may not include spaces. -Digits can also be part of variable names—`catch22` is a valid name, -for example—but the name must not start with a digit. A variable name -cannot include punctuation, except for the characters `$` and `_`. - -(((= operator)))(((assignment)))(((variable,assignment)))When a -variable points at a value, that does not mean it is tied to that -value forever. The `=` operator can be used at any time on existing -variables to disconnect them from their current value and have them -point to a new one. - -[source,javascript] ----- -var mood = "light"; -console.log(mood); -// → light -mood = "dark"; -console.log(mood); -// → dark ----- - -(((variable,model of)))(((tentacle (analogy))))You should -imagine variables as tentacles, rather than boxes. They do not -_contain_ values; they _grasp_ them—two variables can refer to the -same value. A program can access only the values that it still has a -hold on. When you need to remember something, you grow a tentacle to -hold on to it or you reattach one of your existing tentacles to it. - -image::img/octopus.jpg[alt="Variables as tentacles"] - -Let's look at an example. To remember the number of dollars that Luigi -still owes you, you create a variable. And then when he pays back $35, -you give this variable a new value. - -[source,javascript] ----- -var luigisDebt = 140; -luigisDebt = luigisDebt - 35; -console.log(luigisDebt); -// → 105 ----- - -(((undefined)))When you define a variable without giving it a value, -the tentacle has nothing to grasp, so it ends in thin air. If you ask -for the value of an empty variable, you'll get the value `undefined`. - -(((var keyword)))A single `var` statement may define multiple -variables. The definitions must be separated by commas. - -[source,javascript] ----- -var one = 1, two = 2; -console.log(one + two); -// → 3 ----- - -== Keywords and reserved words == - -(((syntax)))(((implements (reserved word))))(((interface (reserved -word))))(((let keyword)))(((package (reserved word))))(((private -(reserved word))))(((protected (reserved word))))(((public (reserved -word))))(((static (reserved word))))(((void operator)))(((yield -(reserved word))))(((reserved word)))(((variable,naming)))Words with -a special meaning, such as `var`, are _((keyword))s_, and they may not -be used as variable names. There are also a number of words that are -“reserved for use” in ((future)) versions of JavaScript. These are also -officially not allowed to be used as variable names, though some -JavaScript environments do allow them. The full list of keywords and -reserved words is rather long. - -[source,text/plain] ----- -break case catch class const continue debugger -default delete do else enum export extends false -finally for function if implements import in -instanceof interface let new null package private -protected public return static super switch this -throw true try typeof var void while with yield ----- - -Don't worry about memorizing these, but remember that this might be -the problem when a variable definition does not work as expected. - -== The environment == - -(((standard environment)))The collection of variables and their values -that exist at a given time is called the _((environment))_. When a -program starts up, this environment is not empty. It always contains -variables that are part of the language ((standard)), and most of the -time, it has variables that provide ways to interact with the -surrounding system. For example, in a ((browser)), there are variables -and functions to inspect and influence the currently loaded website -and to read ((mouse)) and ((keyboard)) input. - -== Functions == - -indexsee:[application (of functions),function application] -indexsee:[invoking (of functions),function application] -indexsee:[calling (of functions),function application] -(((output)))(((function)))(((function,application)))(((alert -function)))(((message box)))A lot of the values provided in the -default environment have the type _((function))_. A function is a -piece of program wrapped in a value. Such values can be _applied_ in -order to run the wrapped program. For example, in a ((browser)) -environment, the variable `alert` holds a function that shows a little -((dialog box)) with a message. It is used like this: - -[source,javascript] ----- -alert("Good morning!"); ----- - -image::img/alert.png[alt="An alert dialog",width="8cm"] - -(((parameter)))(((function,application)))Executing a function is -called _invoking_, _calling_, or _applying_ it. You can call a -function by putting ((parentheses)) after an expression that produces a -function value. Usually you'll directly use the name of the variable -that holds the function. The values between the parentheses are given to -the program inside the function. In the example, the `alert` function -uses the string that we give it as the text to show in the dialog box. -Values given to functions are called _((argument))s_. The `alert` -function needs only one of them, but other functions might need a -different number or different types of arguments. - -== The console.log function == - -(((JavaScript console)))(((developer -tools)))(((Node.js)))(((console.log)))(((output)))The `alert` function -can be useful as an output device when experimenting, but clicking -away all those little windows will get on your nerves. In past -examples, we've used `console.log` to output values. Most JavaScript -systems (including all modern web ((browser))s and Node.js) provide a -`console.log` function that writes out its arguments to _some_ text -output device. In browsers, the output lands in the ((JavaScript -console)). This part of the browser interface is hidden by default, -but most browsers open it when you press F12 or, on Mac, when you -press Command-Option-I. If that does not work, search through the -menus for an item named “web console” or “developer tools”. - -ifdef::interactive_target[] - -When running the examples, or your own code, on the pages of this -book, `console.log` output will be shown after the example, instead of -in the browser's JavaScript console. - -endif::interactive_target[] - -[source,javascript] ----- -var x = 30; -console.log("the value of x is", x); -// → the value of x is 30 ----- - -(((object)))Though ((variable)) names cannot contain ((period -character))s, `console.log` clearly has one. This is because -`console.log` isn't a simple variable. It is actually an expression -that retrieves the `log` ((property)) from the value held by the -`console` variable. We will find out exactly what this means in -link:04_data.html#properties[Chapter 4]. - -[[return_values]] -== Return values == - -(((comparison,of numbers)))(((return value)))(((Math.max -function)))(((maximum)))Showing a dialog box or writing text to -the screen is a _((side effect))_. A lot of functions are useful -because of the side effects they produce. Functions may also produce -values, and in that case, they don't need to have a side effect to be -useful. For example, the function `Math.max` takes any number of -number values and gives back the greatest. - -[source,javascript] ----- -console.log(Math.max(2, 4)); -// → 4 ----- - -(((function,application)))(((minimum)))(((Math.min -function)))When a function produces a value, it is said to _return_ -that value. Anything that produces a value is an ((expression)) in -JavaScript, which means function calls can be used within larger -expressions. Here a call to `Math.min`, which is the opposite of -`Math.max`, is used as an input to the plus operator: - -[source,javascript] ----- -console.log(Math.min(2, 4) + 100); -// → 102 ----- - -The link:03_functions.html#functions[next chapter] explains how to -write your own functions. - -== prompt and confirm == - -(((dialog box)))(((input)))(((browser)))(((confirm function)))Browser -environments contain other functions besides `alert` for popping up -windows. You can ask the user an OK/Cancel question using -`confirm`. This returns a Boolean: `true` if the user clicks OK and -`false` if the user clicks Cancel. - -[source,javascript] ----- -confirm("Shall we, then?"); ----- - -image::img/confirm.png[alt="A confirm dialog",width="8cm"] - -(((input)))(((prompt function)))(((text input)))The `prompt` function -can be used to ask an “open” question. The first argument is the -question, the second one is the text that the user starts with. A line -of text can be typed into the dialog window, and the function will -return this text as a string. - -[source,javascript] ----- -prompt("Tell me everything you know.", "..."); ----- - -image::img/prompt.png[alt="An prompt dialog",width="8cm"] - -These two functions aren't used much in modern web programming, mostly -because you have no control over the way the resulting windows look, -but they are useful for toy programs and experiments. - -== Control flow == - -(((execution order)))(((program)))(((control flow)))When your program -contains more than one ((statement)), the statements are executed, -predictably, from top to bottom. As a basic example, this program has -two statements. The first one asks the user for a number, and the -second, which is executed afterward, shows the ((square)) of that -number. - -[source,javascript] ----- -var theNumber = Number(prompt("Pick a number", "")); -alert("Your number is the square root of " + - theNumber * theNumber); ----- - -(((number,conversion to)))(((type coercion)))(((Number -function)))(((String function)))(((Boolean -function)))(((Boolean,conversion to)))The function `Number` converts a -value to a number. We need that conversion because the result of -`prompt` is a string value, and we want a number. There are similar -functions called `String` and `Boolean` that convert values to those -types. - -Here is the rather trivial schematic representation of straight -control flow: - -image::img/controlflow-straight.svg[alt="Trivial control flow",width="4cm"] - -== Conditional execution == - -(((Boolean)))(((control flow)))Executing statements in straight-line -order isn't the only option we have. An alternative is _((conditional -execution))_, where we choose between two different routes based on a -Boolean value, like this: - -image::img/controlflow-if.svg[alt="Conditional control flow",width="4cm"] - -(((syntax)))(((Number function)))(((if keyword)))Conditional execution -is written with the `if` keyword in JavaScript. In the simple case, we -just want some code to be executed if, and only if, a certain -condition holds. For example, in the previous program, we might want -to show the square of the input only if the input is actually a -number. - -[source,javascript] ----- -var theNumber = Number(prompt("Pick a number", "")); -if (!isNaN(theNumber)) - alert("Your number is the square root of " + - theNumber * theNumber); ----- - -With this modification, if you enter “cheese”, no output will be shown. - -The keyword `if` executes or skips a statement depending on the value -of a Boolean expression. The deciding expression is written after the -keyword, between ((parentheses)), followed by the statement to execute. - -(((isNaN function)))The `isNaN` function is a standard JavaScript -function that returns `true` only if the argument it is given is -`NaN`. The `Number` function happens to return `NaN` when you give it -a string that doesn't represent a valid number. Thus, the condition -translates to “unless `theNumber` is not-a-number, do this”. - -(((else keyword)))You often won't just have code that executes when a -condition holds true, but also code that handles the other case. This -alternate path is represented by the second arrow in the -diagram. The `else` keyword can be used, together with `if`, to create -two separate, alternative execution paths. - -[source,javascript] ----- -var theNumber = Number(prompt("Pick a number", "")); -if (!isNaN(theNumber)) - alert("Your number is the square root of " + - theNumber * theNumber); -else - alert("Hey. Why didn't you give me a number?"); ----- - -(((if keyword,chaining)))If we have more than two paths to choose -from, multiple `if`/`else` pairs can be “chained” together. Here's an -example: - -[source,javascript] ----- -var num = Number(prompt("Pick a number", "0")); - -if (num < 10) - alert("Small"); -else if (num < 100) - alert("Medium"); -else - alert("Large"); ----- - -The program will first check whether `num` is less than 10. If it is, -it chooses that branch, shows `"Small"`, and is done. If it isn't, it -takes the `else` branch, which itself contains a second `if`. If the -second condition (`< 100`) holds, that means the number is between 10 -and 100, and `"Medium"` is shown. If it doesn't, the second, and last, -`else` branch is chosen. - -The flow chart for this program looks something like this: - -image::img/controlflow-nested-if.svg[alt="Nested if control flow",width="4cm"] - -[[loops]] -== while and do loops == - -(((even number)))Consider a program that prints all even numbers from -0 to 12. One way to write this is as follows: - -[source,javascript] ----- -console.log(0); -console.log(2); -console.log(4); -console.log(6); -console.log(8); -console.log(10); -console.log(12); ----- - -(((control flow)))That works, but the idea of writing a program is to -make something _less_ work, not more. If we needed all even numbers -less than 1,000, the previous would be unworkable. What we need is a -way to repeat some code. This form of control flow is called a -_((loop))_: - -image::img/controlflow-loop.svg[alt="Loop control flow",width="4cm"] - -(((syntax)))(((counter variable)))Looping control flow allows us to go -back to some point in the program where we were before and repeat it -with our current program state. If we combine this with a variable -that counts, we can do something like this: - -[source,javascript] ----- -var number = 0; -while (number <= 12) { - console.log(number); - number = number + 2; -} -// → 0 -// → 2 -// … etcetera ----- - -(((while loop)))(((Boolean)))A ((statement)) starting with the -keyword `while` creates a loop. The word `while` is followed by an -((expression)) in ((parentheses)) and then a statement, much like `if`. -The loop executes that statement as long as the expression produces a -value that is `true` when converted to Boolean type. - -(((grouping)))((({} (block))))(((block)))In this loop, we want to both -print the current number and add two to our variable. Whenever we need -to execute multiple ((statement))s inside a loop, we wrap them in -((curly braces)) (`{` and `}`). Braces do for statements what -((parentheses)) do for expressions: they group them together, making -them count as a single statement. A sequence of statements wrapped in -braces is called a _block_. - -(((programming style)))Many JavaScript programmers wrap every single -loop or `if` body in braces. They do this both for the sake of -consistency and to avoid having to add or remove braces when changing -the number of statements in the body later. In this book, I will write -most single-statement bodies without braces, since I value brevity. -You are free to go with whichever style you prefer. - -(((comparison)))(((state)))The variable `number` demonstrates the way -a ((variable)) can track the progress of a program. Every time the -loop repeats, `number` is incremented by `2`. Then, at the beginning -of every repetition, it is compared with the number `12` to decide -whether the program has done all the work it intended to do. - -(((exponentiation)))As an example that actually does something useful, -we can now write a program that calculates and shows the value of -2^10^ (2 to the 10th power). We use two variables: one to keep -track of our result and one to count how often we have multiplied this -result by 2. The loop tests whether the second variable has reached 10 -yet and then updates both variables. - -[source,javascript] ----- -var result = 1; -var counter = 0; -while (counter < 10) { - result = result * 2; - counter = counter + 1; -} -console.log(result); -// → 1024 ----- - -The counter could also start at `1` and check for `<= 10`, but, for -reasons that will become apparent in -link:04_data.html#array_indexing[Chapter 4], it is a good idea to get -used to counting from 0. - -(((loop body)))(((do loop)))(((control flow)))The `do` loop is a -control structure similar to the `while` loop. It differs only on one -point: a `do` loop always executes its body at least once, and it -starts testing whether it should stop only after that first execution. -To reflect this, the test appears after the body of the loop: - -[source,javascript] ----- -do { - var yourName = prompt("Who are you?"); -} while (!yourName); -console.log(yourName); ----- - -(((Boolean,conversion to)))(((! operator)))This program will -force you to enter a name. It will ask again and again until it gets -something that is not an empty string. Applying the `!` operator will -convert a value to Boolean type before negating it, and all strings -except `""` convert to `true`. This means the loop continues going round -until you provide a name that is not the empty string. - -== Indenting Code == - -(((block)))(((code structure)))(((whitespace)))(((programming -style)))You've probably noticed the spaces I put in front of some -statements. In JavaScript, these are not required—the computer will -accept the program just fine without them. In fact, even the ((line)) -breaks in programs are optional. You could write a program as a single -long line if you felt like it. The role of the ((indentation)) inside -blocks is to make the structure of the code stand out. In complex -code, where new blocks are opened inside other blocks, it can become -hard to see where one block ends and another begins. With proper -indentation, the visual shape of a program corresponds to the shape of -the blocks inside it. I like to use two spaces for every open block, -but tastes differ—some people use four spaces, and some people use -((tab character))s. - -== for loops == - -(((syntax)))(((while loop)))(((counter variable)))Many loops follow -the pattern seen in the previous `while` examples. First, a “counter” -variable is created to track the progress of the loop. Then comes a -`while` loop, whose test expression usually checks whether the counter -has reached some boundary yet. At the end of the loop body, the -counter is updated to track progress. - -(((for loop)))(((loop)))Because this pattern is so common, JavaScript and -similar languages provide a slightly shorter and more comprehensive -form, the `for` loop. - -[source,javascript] ----- -for (var number = 0; number <= 12; number = number + 2) - console.log(number); -// → 0 -// → 2 -// … etcetera ----- - -(((control flow)))(((state)))This program is exactly equivalent to the -link:02_program_structure.html#loops[earlier] even-number-printing -example. The only change is that all the ((statement))s that are -related to the “state” of the loop are now grouped together. - -The ((parentheses)) after a `for` keyword must contain two -((semicolon))s. The part before the first semicolon _initializes_ the -loop, usually by defining a ((variable)). The second part is the -((expression)) that _checks_ whether the loop must continue. The final -part _updates_ the state of the loop after every iteration. In most -cases, this is shorter and clearer than a `while` construct. - -(((exponentiation)))Here is the code that computes 2^10^, using `for` -instead of `while`: - -[source,javascript] ----- -var result = 1; -for (var counter = 0; counter < 10; counter = counter + 1) - result = result * 2; -console.log(result); -// → 1024 ----- - -(((programming style)))(((indentation)))Note that even though no block -is opened with a `{`, the statement in the loop is still indented two -spaces to make it clear that it “belongs” to the line before it. - -== Breaking Out of a Loop == - -(((loop,termination of)))(((break keyword)))Having the loop's -condition produce `false` is not the only way a loop can finish. There -is a special statement called `break` that has the effect of -immediately jumping out of the enclosing loop. - -This program illustrates the `break` statement. It finds the first number -that is both greater than or equal to 20 and divisible by 7. - -[source,javascript] ----- -for (var current = 20; ; current++) { - if (current % 7 == 0) - break; -} -console.log(current); -// → 21 ----- - -(((remainder operator)))(((% operator)))Using the remainder -(`%`) operator is an easy way to test whether a number is divisible by -another number. If it is, the remainder of their division is zero. - -(((for loop)))The `for` construct in the example does not have a part -that checks for the end of the loop. This means that the loop will -never stop unless the `break` statement inside is executed. - -If you were to leave out that `break` statement or accidentally write -a condition that always produces `true`, your program would get stuck -in an _((infinite loop))_. A program stuck in an infinite loop will -never finish running, which is usually a bad thing. - -ifdef::interactive_target[] - -If you create an infinite loop in one of the examples on these pages, -you'll usually be asked whether you want to stop the script after a -few seconds. If that fails, you will have to close the tab that you're -working in, or on some browsers close your whole browser, in order to -recover. - -endif::interactive_target[] - -(((continue keyword)))The `continue` keyword is similar to `break`, in -that it influences the progress of a loop. When `continue` is -encountered in a loop body, control jumps out of the body and -continues with the loop's next iteration. - -== Updating variables succinctly == - -(((assignment)))(((+= operator)))(((-= operator)))(((/= -operator)))(((*= operator)))(((state)))(((side effect)))Especially -when looping, a program often needs to “update” a variable to hold a -value based on that variable's previous value. - -// test: no - -[source,javascript] ----- -counter = counter + 1; ----- - -JavaScript provides a shortcut for this: - -// test: no - -[source,javascript] ----- -counter += 1; ----- - -Similar shortcuts work for many other operators, such as `result *= 2` to -double `result` or `counter -= 1` to count downward. - -This allows us to shorten our counting example a little more. - -[source,javascript] ----- -for (var number = 0; number <= 12; number += 2) - console.log(number); ----- - -(((++ operator)))(((-- operator)))For `counter += 1` and `counter -= -1`, there are even shorter equivalents: `counter++` and `counter--`. - -== Dispatching on a value with switch == - -(((syntax)))(((conditional execution)))(((dispatching)))(((if -keyword,chaining)))It is common for code to look like this: - -// test: no - -[source,javascript] ----- -if (variable == "value1") action1(); -else if (variable == "value2") action2(); -else if (variable == "value3") action3(); -else defaultAction(); ----- - -(((colon character)))(((switch keyword)))There is a construct called -`switch` that is intended to solve such a “dispatch” in a more direct -way. Unfortunately, the syntax JavaScript uses for this (which it -inherited from the C/Java line of programming languages) is somewhat -awkward—a chain of `if` statements often looks better. Here is an -example: - -[source,javascript] ----- -switch (prompt("What is the weather like?")) { - case "rainy": - console.log("Remember to bring an umbrella."); - break; - case "sunny": - console.log("Dress lightly."); - case "cloudy": - console.log("Go outside."); - break; - default: - console.log("Unknown weather type!"); - break; -} ----- - -(((fallthrough)))(((comparison)))(((break keyword)))(((case -keyword)))(((default keyword)))You may put any number of `case` labels -inside the block opened by `switch`. The program will jump to the -label that corresponds to the value that `switch` was given or to -`default` if no matching value is found. It starts executing -statements there, even if they're under another label, until it -reaches a `break` statement. In some cases, such as the `"sunny"` case -in the example, this can be used to share some code between cases (it -recommends going outside for both sunny and cloudy weather). But -beware: it is easy to forget such a `break`, which will cause the -program to execute code you do not want executed. - -== Capitalization == - -(((capitalization)))(((variable,naming)))(((whitespace)))Variable -names may not contain spaces, yet it is often helpful to use multiple -words to clearly describe what the variable represents. These are -pretty much your choices for writing a variable name with several -words in it: - ----- -fuzzylittleturtle -fuzzy_little_turtle -FuzzyLittleTurtle -fuzzyLittleTurtle ----- - -(((camel case)))(((programming style)))(((underscore character)))The -first style can be hard to read. Personally, I like the look of the -underscores, though that style is a little painful to type. The -((standard)) JavaScript functions, and most JavaScript programmers, -follow the bottom style—they capitalize every word except the first. -It is not hard to get used to little things like that, and code with -mixed naming styles can be jarring to read, so we will just follow -this ((convention)). - -(((Number function)))(((constructor)))In a few cases, such as the -`Number` function, the first letter of a variable is also capitalized. -This was done to mark this function as a constructor. What a -constructor is will become clear in -link:06_object.html#constructors[Chapter 6]. For now, the important -thing is not to be bothered by this apparent lack of ((consistency)). - -== Comments == - -(((readability)))Often, raw code does not convey all the information -you want a program to convey to human readers, or it conveys it in -such a cryptic way that people might not understand it. At other -times, you might just feel poetic or want to include some thoughts as -part of your program. This is what _((comment))s_ are for. - -(((slash character)))(((line comment)))A comment is a piece of text -that is part of a program but is completely ignored by the computer. -JavaScript has two ways of writing comments. To write a single-line -comment, you can use two slash characters (`//`) and then the comment -text after it. - -// test: no - -[source,javascript] ----- -var accountBalance = calculateBalance(account); -// It's a green hollow where a river sings -accountBalance.adjust(); -// Madly catching white tatters in the grass. -var report = new Report(); -// Where the sun on the proud mountain rings: -addToReport(accountBalance, report); -// It's a little valley, foaming like light in a glass. ----- - -(((block comment)))A `//` comment goes only to the end of the line. A -section of text between `/*` and `*/` will be ignored, regardless of -whether it contains line breaks. This is often useful for adding -blocks of information about a file or a chunk of program. - -[source,javascript] ----- -/* - I first found this number scrawled on the back of one of - my notebooks a few years ago. Since then, it has often - dropped by, showing up in phone numbers and the serial - numbers of products that I've bought. It obviously likes - me, so I've decided to keep it. -*/ -var myNumber = 11213; ----- - -== Summary == - -You now know that a program is built out of statements, which -themselves sometimes contain more statements. Statements tend to -contain expressions, which themselves can be built out of smaller -expressions. - -Putting statements after one another gives you a program that is -executed from top to bottom. You can introduce disturbances in the -flow of control by using conditional (`if`, `else`, and `switch`) and -looping (`while`, `do`, and `for`) statements. - -Variables can be used to file pieces of data under a name, and they -are useful for tracking state in your program. The environment is the -set of variables that are defined. JavaScript systems -always put a number of useful standard variables into your -environment. - -Functions are special values that encapsulate a piece of program. You -can invoke them by writing `functionName(argument1, argument2)`. Such -a function call is an expression, and may produce a value. - -== Exercises == - -(((exercises)))If you are unsure how to try your solutions to -exercises, refer to the link:00_intro.html#intro[introduction]. - -Each exercise starts with a problem description. Read that and try to -solve the exercise. If you run into problems, consider reading the -hints (!interactive after the exercise!)(!book at the link:hints.html#hints[end of the book]!). -Full solutions to the exercises are not included in this -book, but you can find them online at -http://eloquentjavascript.net/code[_eloquentjavascript.net/code_]. -If you want to learn something from the exercises, I recommend looking -at the solutions only after you've solved the exercise, or at least -after you've attacked it long and hard enough to have a slight -headache. - -=== Looping a triangle === - -(((triangle (exercise))))Write a ((loop)) that makes seven calls to -`console.log` to output the following triangle: - ----- -# -## -### -#### -##### -###### -####### ----- - -It may be useful to know that you can find the length of a string by -writing `.length` after it. - -[source,javascript] ----- -var abc = "abc"; -console.log(abc.length); -// → 3 ----- - -ifdef::interactive_target[] - -Most exercises contain a piece of code that you can modify to solve -the exercise. Remember that you can click code blocks to edit them. - -[source,javascript] ----- -// Your code here. ----- -endif::interactive_target[] - -!!hint!! - -(((triangle (exercise))))You can start with a program that simply -prints out the numbers 1 to 7, which you can derive by making a few -modifications to the -link:02_program_structure.html#loops[even number printing example] -given earlier in the chapter, where the `for` loop was introduced. - -Now consider the equivalence between numbers and strings of hash -characters. You can go from 1 to 2 by adding 1 (`+= 1`). You can go -from `"#"` to `"##"` by adding a character (`+= "#"`). Thus, your -solution can closely follow the number-printing program. - -!!hint!! - -=== FizzBuzz === - -(((FizzBuzz (exercise))))(((loop)))(((conditional execution)))Write a -program that uses `console.log` to print all the numbers from 1 to -100, with two exceptions. For numbers divisible by 3, print `"Fizz"` -instead of the number, and for numbers divisible by 5 (and not 3), -print `"Buzz"` instead. - -When you have that working, modify your program to print `"FizzBuzz"`, -for numbers that are divisible by both 3 and 5 (and still print -`"Fizz"` or `"Buzz"` for numbers divisible by only one of those). - -(This is actually an ((interview question)) that has been claimed to -weed out a significant percentage of programmer candidates. So if you -solved it, you're now allowed to feel good about yourself.) - -ifdef::interactive_target[] -[source,javascript] ----- -// Your code here. ----- -endif::interactive_target[] - -!!hint!! - -(((FizzBuzz (exercise))))(((remainder operator)))(((% operator)))Going -over the numbers is clearly a looping job, and selecting what to print -is a matter of conditional execution. Remember the trick of using the -remainder (`%`) operator for checking whether a number is divisible by -another number (has a remainder of zero). - -In the first version, there are three possible outcomes for every -number, so you'll have to create an `if`/`else if`/`else` chain. - -(((|| operator)))(((if keyword,chaining)))The second version of the -program has a straightforward solution and a clever one. The simple -way is to add another “branch” to precisely test the given condition. -For the clever method, build up a string containing the word or words -to output, and print either this word or the number if there is no -word, potentially by making elegant use of the `||` operator. - -!!hint!! - -=== Chess board === - -(((chess board (exercise))))(((loop)))(((nesting,of loops)))(((newline -character)))Write a program that creates a string that represents an -8×8 grid, using newline characters to separate lines. At each position -of the grid there is either a space or a “#” character. The characters -should form a chess board. - -Passing this string to `console.log` should show something like this: - ----- - # # # # -# # # # - # # # # -# # # # - # # # # -# # # # - # # # # -# # # # ----- - -When you have a program that generates this pattern, define a -((variable)) `size = 8` and change the program so that it works for -any `size`, outputting a grid of the given width and height. - -ifdef::interactive_target[] -[source,javascript] ----- -// Your code here. ----- -endif::interactive_target[] - -!!hint!! - -(((chess board (exercise))))The string can be built by starting with -an empty one (`""`) and repeatedly adding characters. A newline -character is written `"\n"`. - -Use `console.log` to inspect the output of your program. - -(((nesting,of loops)))To work with two ((dimensions)), you will need a -((loop)) inside of a loop. Put ((curly braces)) around the bodies of -both loops to make it easy to see where they start and end. Try to -properly indent these bodies. The order of the loops must follow the -order in which we build up the string (line by line, left to right, -top to bottom). So the outer loop handles the lines and the inner loop -handles the characters on a line. - -(((counter variable)))(((remainder operator)))(((% operator)))You'll -need two variables to track your progress. To know whether to put a -space or a hash sign at a given position, you could test whether the -sum of the two counters is even (`% 2`). - -Terminating a line by adding a newline character happens after the -line has been built up, so do this after the inner loop but inside of -the outer loop. - -!!hint!! diff --git a/03_functions.txt b/03_functions.txt deleted file mode 100644 index 5c28d56e3..000000000 --- a/03_functions.txt +++ /dev/null @@ -1,1033 +0,0 @@ -:chap_num: 3 -:prev_link: 02_program_structure -:next_link: 04_data - -= Functions = - -[chapterquote="true"] -[quote, Donald Knuth] -____ -People think that computer science is the art of -geniuses but the actual reality is the opposite, just many people -doing things that build on each other, like a wall of mini stones. -____ - -(((Knuth+++,+++ Donald)))(((function)))(((code structure)))You've seen function values, such -as `alert`, and how to call them. Functions are the bread and butter -of JavaScript programming. The concept of wrapping a piece of program -in a value has many uses. It is a tool to structure larger programs, -to reduce repetition, to associate names with subprograms, and to -isolate these subprograms from each other. - -(((human language)))The most obvious application of functions is -defining new ((vocabulary)). Creating new words in regular, -human-language prose is usually bad style. But in programming, it is -indispensable. - -(((abstraction)))Typical adult English speakers have some 20,000 words -in their vocabulary. Few programming languages come with 20,000 -commands built in. And the vocabulary that _is_ available tends to be -more precisely defined, and thus less flexible, than in human -language. Therefore, we usually _have_ to add some of our own -vocabulary to avoid repeating ourselves too much. - -== Defining a function == - -(((square)))(((function,definition)))A function definition is just a -regular ((variable)) definition where the value given to the variable -happens to be a function. For example, the following code defines the -variable `square` to refer to a function that produces the square of a -given number: - -[source,javascript] ----- -var square = function(x) { - return x * x; -}; - -console.log(square(12)); -// → 144 ----- - -indexsee:[braces,curly braces] -(((curly braces)))(((block)))(((syntax)))(((function -keyword)))(((function,body)))(((function,as value)))A function is -created by an expression that starts with the keyword `function`. -Functions have a set of _((parameter))s_ (in this case, only `x`) and -a _body_, which contains the statements that are to be executed when -the function is called. The function body must always be wrapped in -braces, even when it consists of only a single ((statement)) (as -in the previous example). - -(((power example)))A function can have multiple parameters or no -parameters at all. In the following example, `makeNoise` does not list -any parameter names, whereas `power` lists two: - -[source,javascript] ----- -var makeNoise = function() { - console.log("Pling!"); -}; - -makeNoise(); -// → Pling! - -var power = function(base, exponent) { - var result = 1; - for (var count = 0; count < exponent; count++) - result *= base; - return result; -}; - -console.log(power(2, 10)); -// → 1024 ----- - -(((return value)))(((return keyword)))(((undefined)))Some functions -produce a value, such as `power` and `square`, and some don't, such as -`makeNoise`, which produces only a ((side effect)). A `return` -statement determines the value the function returns. When control -comes across such a statement, it immediately jumps out of the current -function and gives the returned value to the code that called the -function. The `return` keyword without an expression after it will -cause the function to return `undefined`. - -== Parameters and scopes == - -(((function,application)))(((variable,from parameter)))The -((parameter))s to a function behave like regular variables, but their -initial values are given by the _caller_ of the function, not the code -in the function itself. - -(((function,scope)))(((scope)))(((local variable)))An -important property of functions is that the variables created inside -of them, including their parameters, are _local_ to the function. This -means, for example, that the `result` variable in the `power` example -will be newly created every time the function is called, and these -separate incarnations do not interfere with each other. - -indexsee:[top-level scope,global scope] -(((var keyword)))(((global scope)))(((variable,global)))This -“localness” of variables applies only to the parameters and to variables -declared with the `var` keyword inside the function body. Variables -declared outside of any function are called _global_, because they are -visible throughout the program. It is possible to access such -variables from inside a function, as long as you haven't declared a -local variable with the same name. - -(((variable,assignment)))The following code demonstrates this. It -defines and calls two functions that both assign a value to the -variable `x`. The first one declares the variable as local and thus -changes only the local variable. The second does not declare `x` -locally, so references to `x` inside of it refer to the global -variable `x` defined at the top of the example. - -[source,javascript] ----- -var x = "outside"; - -var f1 = function() { - var x = "inside f1"; -}; -f1(); -console.log(x); -// → outside - -var f2 = function() { - x = "inside f2"; -}; -f2(); -console.log(x); -// → inside f2 ----- - -(((variable,naming)))(((scope)))(((global scope)))(((code,structure -of)))This behavior helps prevent accidental interference between -functions. If all variables were shared by the whole program, it'd -take a lot of effort to make sure no name is ever used for two -different purposes. And if you _did_ reuse a variable name, you might -see strange effects from unrelated code messing with the value of your -variable. By treating function-local variables as existing only within -the function, the language makes it possible to read and understand -functions as small universes, without having to worry about all the -code at once. - -[[scoping]] -== Nested scope == - -(((nesting,of functions)))(((nesting,of -scope)))(((scope)))(((inner function)))(((lexical -scoping)))JavaScript distinguishes not just between _global_ and -_local_ variables. Functions can be created inside other functions, -producing several degrees of locality. - -(((landscape example)))For example, this rather nonsensical function -has two functions inside of it: - -[source,javascript] ----- -var landscape = function() { - var result = ""; - var flat = function(size) { - for (var count = 0; count < size; count++) - result += "_"; - }; - var mountain = function(size) { - result += "/"; - for (var count = 0; count < size; count++) - result += "'"; - result += "\\"; - }; - - flat(3); - mountain(4); - flat(6); - mountain(1); - flat(1); - return result; -}; - -console.log(landscape()); -// → ___/''''\______/'\_ ----- - -(((function,scope)))(((scope)))The `flat` and `mountain` functions -can “see” the variable called `result`, since they are inside the -function that defines it. But they cannot see each other's `count` -variables since they are outside each other's scope. The environment -outside of the `landscape` function doesn't see any of the variables -defined inside `landscape`. - -In short, each local scope can also see all the local scopes that -contain it. The set of variables visible inside a function is -determined by the place of that function in the program text. All -variables from blocks _around_ a function's definition are -visible—meaning both those in function bodies that enclose it and -those at the top level of the program. This approach to variable -visibility is called _((lexical scoping))_. - -((({} (block))))People who have experience with other programming -languages might expect that any block of code between braces produces -a new local environment. But in JavaScript, functions are the only -things that create a new scope. You are allowed to use free-standing -blocks. - -[source,javascript] ----- -var something = 1; -{ - var something = 2; - // Do stuff with variable something... -} -// Outside of the block again... ----- - -But the `something` inside the block refers to the same variable as -the one outside the block. In fact, although blocks like this are -allowed, they are useful only to group the body of an `if` statement -or a loop. - -(((let keyword)))(((ECMAScript 6)))If you find this odd, you're not -alone. The next version of JavaScript will introduce a `let` keyword, -which works like `var` but creates a variable that is local to the -enclosing _block_, not the enclosing _function_. - -== Functions as values == - -(((function,as value)))Function ((variable))s usually simply act as -names for a specific piece of the program. Such a variable is defined -once and never changed. This makes it easy to start confusing the -function and its name. - -(((variable,assignment)))But the two are different. A function value -can do all the things that other values can do—you can use it in -arbitrary ((expression))s, not just call it. It is possible to store a -function value in a new place, pass it as an argument to a function, -and so on. Similarly, a variable that holds a function is still just a -regular variable and can be assigned a new value, like so: - -// test: no - -[source,javascript] ----- -var launchMissiles = function(value) { - missileSystem.launch("now"); -}; -if (safeMode) - launchMissiles = function(value) {/* do nothing */}; ----- - -(((function,higher-order)))In -link:05_higher_order.html#higher_order[Chapter 5], we will discuss the -wonderful things that can be done by passing around function values to -other functions. - -== Declaration notation == - -(((syntax)))(((square example)))(((function -keyword)))(((function,definition)))(((function,declaration)))There is -a slightly shorter way to say “++var square = function…++”. The -`function` keyword can also be used at the start of a statement, as in -the following: - -[source,javascript] ----- -function square(x) { - return x * x; -} ----- - -(((future)))(((execution order)))This is a function _declaration_. The -statement defines the variable `square` and points it at the given -function. So far so good. There is one subtlety with this form of -function definition, however. - -[source,javascript] ----- -console.log("The future says:", future()); - -function future() { - return "We STILL have no flying cars."; -} ----- - -This code works, even though the function is defined _below_ the code -that uses it. This is because function declarations are not part of -the regular top-to-bottom flow of control. They are conceptually moved -to the top of their scope and can be used by all the code in that -scope. This is sometimes useful because it gives us the freedom to -order code in a way that seems meaningful, without worrying about -having to define all functions above their first use. - -(((function,declaration)))What happens when you put such a function -definition inside a conditional (`if`) block or a loop? Well, don't do -that. Different JavaScript platforms in different browsers have -traditionally done different things in that situation, and the latest -((standard)) actually forbids it. If you want your programs to behave -consistently, only use this form of function-defining statements in -the outermost block of a function or program. - -[source,javascript] ----- -function example() { - function a() {} // Okay - if (something) { - function b() {} // Danger! - } -} ----- - -[[stack]] -== The call stack == - -indexsee:[stack,call stack] -(((call stack)))(((function,application)))It will be helpful to take a -closer look at the way control flows through functions. Here is a -simple program that makes a few function calls: - -[source,javascript] ----- -function greet(who) { - console.log("Hello " + who); -} -greet("Harry"); -console.log("Bye"); ----- - -(((control flow)))(((execution order)))(((console.log)))A run through -this program goes roughly like this: the call to `greet` causes -control to jump to the start of that function (line 2). It calls -`console.log` (a built-in browser function), which takes control, does -its job, and then returns control to line 2. Then it reaches the end -of the `greet` function, so it returns to the place that called it, at -line 4. The line after that calls `console.log` again. - -We could show the flow of control schematically like this: - ----- -top - greet - console.log - greet -top - console.log -top ----- - -(((return keyword)))(((memory)))Because a function has to jump back to -the place of the call when it returns, the computer must remember the -context from which the function was called. In one case, `console.log` -has to jump back to the `greet` function. In the other case, it jumps -back to the end of the program. - -The place where the computer stores this context is the _((call -stack))_. Every time a function is called, the current context is put -on top of this “stack”. When the function returns, it removes the top -context from the stack and uses it to continue execution. - -(((infinite loop)))(((stack overflow)))(((recursion)))Storing this -stack requires space in the computer's memory. When the stack grows -too big, the computer will fail with a message like “out of stack -space” or “too much recursion”. The following code illustrates this by -asking the computer a really hard question, which causes an infinite -back-and-forth between two functions. Rather, it _would_ be infinite, -if the computer had an infinite stack. As it is, we will run out of -space, or “blow the stack”. - -// test: no - -[source,javascript] ----- -function chicken() { - return egg(); -} -function egg() { - return chicken(); -} -console.log(chicken() + " came first."); -// → ?? ----- - -== Optional Arguments == - -(((argument)))(((function,application)))The following code is allowed -and executes without any problem: - -[source,javascript] ----- -alert("Hello", "Good Evening", "How do you do?"); ----- - -(((alert function)))The function `alert` officially accepts only one -argument. Yet when you call it like this, it doesn't complain. It -simply ignores the other arguments and shows you “Hello”. - -(((undefined)))(((parameter)))JavaScript is extremely broad-minded -about the number of arguments you pass to a function. If you pass too -many, the extra ones are ignored. If you pass too few, the missing -parameters simply get assigned the value `undefined`. - -The downside of this is that it is possible—likely, even—that you'll -accidentally pass the wrong number of arguments to functions and no -one will tell you about it. - -[[power]] -(((power example)))(((optional argument)))The -upside is that this behavior can be used to have a function take -“optional” arguments. For example, the following version of `power` -can be called either with two arguments or with a single argument, in -which case the exponent is assumed to be two, and the function behaves -like `square`. - -// test: wrap - -[source,javascript] ----- -function power(base, exponent) { - if (exponent == undefined) - exponent = 2; - var result = 1; - for (var count = 0; count < exponent; count++) - result *= base; - return result; -} - -console.log(power(4)); -// → 16 -console.log(power(4, 3)); -// → 64 ----- - -(((console.log)))In the link:04_data.html#arguments_object[next -chapter], we will see a way in which a function body can get at the -exact list of arguments that were passed. This is helpful because it -makes it possible for a function to accept any number of arguments. -For example, `console.log` makes use of this—it outputs all of the -values it is given. - -[source,javascript] ----- -console.log("R", 2, "D", 2); -// → R 2 D 2 ----- - -== Closure == - -(((call stack)))(((local variable)))(((function,as -value)))(((closure)))(((scope)))The ability to treat functions as -values, combined with the fact that local variables are “re-created” -every time a function is called, brings up an interesting question. -What happens to local variables when the function call that created -them is no longer active? - -The following code shows an example of this. It defines a function, -`wrapValue`, which creates a local variable. It then returns a function -that accesses and returns this local variable. - -[source,javascript] ----- -function wrapValue(n) { - var localVariable = n; - return function() { return localVariable; }; -} - -var wrap1 = wrapValue(1); -var wrap2 = wrapValue(2); -console.log(wrap1()); -// → 1 -console.log(wrap2()); -// → 2 ----- - -This is allowed and works as you'd hope—the variable can still be -accessed. In fact, multiple instances of the variable can be alive at -the same time, which is another good illustration of the concept that -local variables really are re-created for every call—different calls -can't trample on one another's local variables. - -This feature—being able to reference a specific instance of local -variables in an enclosing function—is called _closure_. A function -that “closes over” some local variables is called _a_ closure. This -behavior not only frees you from having to worry about lifetimes of -variables but also allows for some creative use of function values. - -(((multiplier function)))With a slight change, we can turn the -previous example into a way to create functions that multiply by an -arbitrary amount. - -[source,javascript] ----- -function multiplier(factor) { - return function(number) { - return number * factor; - }; -} - -var twice = multiplier(2); -console.log(twice(5)); -// → 10 ----- - -(((variable,from parameter)))The explicit `localVariable` from the -`wrapValue` example isn't needed since a parameter is itself a local -variable. - -(((function,model of)))Thinking about programs like this takes some -practice. A good mental model is to think of the `function` keyword as -“freezing” the code in its body and wrapping it into a package (the -function value). So when you read `return function(...) {...}`, think -of it as returning a handle to a piece of computation, frozen for -later use. - -In the example, `multiplier` returns a frozen chunk of code that gets -stored in the `twice` variable. The last line then calls the value in -this variable, causing the frozen code (`return number * factor;`) to -be activated. It still has access to the `factor` variable from the -`multiplier` call that created it, and in addition it gets access to -the argument passed when unfreezing it, 5, through its `number` -parameter. - -== Recursion == - -(((power example)))(((stack -overflow)))(((recursion)))(((function,application)))It is perfectly -okay for a function to call itself, as long as it takes care not to -overflow the stack. A function that calls itself is called -_recursive_. Recursion allows some functions to be written in a -different style. Take, for example, this alternative implementation of -`power`: - -// test: wrap - -[source,javascript] ----- -function power(base, exponent) { - if (exponent == 0) - return 1; - else - return base * power(base, exponent - 1); -} - -console.log(power(2, 3)); -// → 8 ----- - -(((loop)))(((readability)))(((mathematics)))This is rather -close to the way mathematicians define exponentiation and arguably -describes the concept in a more elegant way than the looping variant -does. The function calls itself multiple times with different -arguments to achieve the repeated multiplication. - -(((function,application)))(((efficiency)))But this implementation has -one important problem: in typical JavaScript implementations, it's -about 10 times slower than the looping version. Running through a -simple loop is a lot cheaper than calling a function multiple times. - -(((optimization)))The dilemma of speed versus ((elegance)) is an -interesting one. You can see it as a kind of continuum between -human-friendliness and machine-friendliness. Almost any program can be -made faster by making it bigger and more convoluted. The programmer -must decide on an appropriate balance. - -In the case of the link:03_functions.html#power[earlier] `power` -function, the inelegant (looping) version is still fairly simple and -easy to read. It doesn't make much sense to replace it with the -recursive version. Often, though, a program deals with such complex -concepts that giving up some efficiency in order to make the program -more straightforward becomes an attractive choice. - -(((profiling)))The basic rule, which has been repeated by many -programmers and with which I wholeheartedly agree, is to not worry -about efficiency until you know for sure that the program is too slow. -If it is, find out which parts are taking up the most time, and start -exchanging elegance for efficiency in those parts. - -Of course, this rule doesn't mean one should start ignoring -performance altogether. In many cases, like the `power` function, not -much simplicity is gained from the “elegant” approach. And sometimes -an experienced programmer can see right away that a simple approach is -never going to be fast enough. - -(((premature optimization)))The reason I'm stressing this is that -surprisingly many beginning programmers focus fanatically on -efficiency, even in the smallest details. The result is bigger, more -complicated, and often less correct programs, that take longer to -write than their more straightforward equivalents and that usually run -only marginally faster. - -(((branching recursion)))But recursion is not always just a -less-efficient alternative to looping. Some problems are much easier -to solve with recursion than with loops. Most often these are problems -that require exploring or processing several “branches”, each of which -might branch out again into more branches. - -[[recursive_puzzle]] - -(((recursion)))(((number puzzle example)))Consider this puzzle: by -starting from the number 1 and repeatedly either adding 5 or -multiplying by 3, an infinite amount of new numbers can be produced. -How would you write a function that, given a number, tries to find a -sequence of such additions and multiplications that produce that -number? For example, the number 13 could be reached by first -multiplying by 3 and then adding 5 twice, whereas the number 15 cannot -be reached at all. - -Here is a recursive solution: - -[source,javascript] ----- -function findSolution(target) { - function find(current, history) { - if (current == target) - return history; - else if (current > target) - return null; - else - return find(current + 5, "(" + history + " + 5)") || - find(current * 3, "(" + history + " * 3)"); - } - return find(1, "1"); -} - -console.log(findSolution(24)); -// → (((1 * 3) + 5) * 3) ----- - -Note that this program doesn't necessarily find the _shortest_ -sequence of operations. It is satisfied when it finds any sequence at -all. - -I don't necessarily expect you to see how it works right away. But -let's work through it, since it makes for a great exercise in -recursive thinking. - -The inner function `find` does the actual recursing. It takes two -((argument))s—the current number and a string that records how we -reached this number—and returns either a string that shows how to get -to the target or `null`. - -(((null)))(((|| operator)))(((short-circuit evaluation)))To do this, the -function performs one of three actions. If the current number is the -target number, the current history is a way to reach that target, so -it is simply returned. If the current number is greater than the -target, there's no sense in further exploring this history since both -adding and multiplying will only make the number bigger. And finally, -if we're still below the target, the function tries both possible -paths that start from the current number, by calling itself twice, -once for each of the allowed next steps. If the first call returns -something that is not `null`, it is returned. Otherwise, the second -call is returned—regardless of whether it produces a string or `null`. - -(((call stack)))To better understand how this function produces the -effect we're looking for, let's look at all the calls to `find` that -are made when searching for a solution for the number 13. - ----- -find(1, "1") - find(6, "(1 + 5)") - find(11, "((1 + 5) + 5)") - find(16, "(((1 + 5) + 5) + 5)") - too big - find(33, "(((1 + 5) + 5) * 3)") - too big - find(18, "((1 + 5) * 3)") - too big - find(3, "(1 * 3)") - find(8, "((1 * 3) + 5)") - find(13, "(((1 * 3) + 5) + 5)") - found! ----- - -The indentation suggests the depth of the call stack. The first time -`find` is called it calls itself twice to explore the solutions that start with -`(1 + 5)` and `(1 * 3)`. The first call tries to find a solution that -starts with `(1 + 5)` and, using recursion, explores _every_ solution -that yields a number less than or equal to the target number. Since -it doesn't find a solution that hits the target, it returns `null` -back to the first call. There the `||` operator causes the call that -explores `(1 * 3)` to happen. This search has more luck because its -first recursive call, through yet _another_ recursive call, hits upon -the target number, 13. This innermost recursive call returns a string, -and each of the `||` operators in the intermediate calls pass that -string along, ultimately returning our solution. - -== Growing functions == - -(((function,definition)))There are two more or less natural ways for -functions to be introduced into programs. - -(((repetition)))The first is that you find yourself writing very -similar code multiple times. We want to avoid doing that since having -more code means more space for mistakes to hide and more material to -read for people trying to understand the program. So we take the -repeated functionality, find a good name for it, and put it into a -function. - -The second way is that you find you need some functionality that you -haven't written yet and that sounds like it deserves its own function. -You'll start by naming the function, and you'll then write its body. -You might even start writing code that uses the function before you -actually define the function itself. - -(((function,naming)))(((variable,naming)))How difficult it is to find -a good name for a function is a good indication of how clear a concept -it is that you're trying to wrap. Let's go through an example. - -(((farm example)))We want to write a program that prints two numbers, -the numbers of cows and chickens on a farm, with the words `Cows` and -`Chickens` after them, and zeros padded before both numbers so that -they are always three digits long. - ----- -007 Cows -011 Chickens ----- - -That clearly asks for a function of two arguments. Let's get coding. - -[source,javascript] ----- -function printFarmInventory(cows, chickens) { - var cowString = String(cows); - while (cowString.length < 3) - cowString = "0" + cowString; - console.log(cowString + " Cows"); - var chickenString = String(chickens); - while (chickenString.length < 3) - chickenString = "0" + chickenString; - console.log(chickenString + " Chickens"); -} -printFarmInventory(7, 11); ----- - -(((length property,for string)))(((while loop)))Adding `.length` -after a string value will give us the length of that string. Thus, the -`while` loops keep adding zeros in front of the number strings until -they are at least three characters long. - -Mission accomplished! But just as we are about to send the farmer the -code (along with a hefty invoice, of course), he calls and tells us -he's also started keeping pigs, and couldn't we please extend the -software to also print pigs? - -(((copy-paste programming)))We sure can. But just as we're in the -process of copying and pasting those four lines one more time, we stop -and reconsider. There has to be a better way. Here's a first attempt: - -[source,javascript] ----- -function printZeroPaddedWithLabel(number, label) { - var numberString = String(number); - while (numberString.length < 3) - numberString = "0" + numberString; - console.log(numberString + " " + label); -} - -function printFarmInventory(cows, chickens, pigs) { - printZeroPaddedWithLabel(cows, "Cows"); - printZeroPaddedWithLabel(chickens, "Chickens"); - printZeroPaddedWithLabel(pigs, "Pigs"); -} - -printFarmInventory(7, 11, 3); ----- - -(((function,naming)))It works! But that name, -`printZeroPaddedWithLabel`, is a little awkward. It conflates three -things—printing, zero-padding, and adding a label—into a single -function. - -(((zeroPad function)))Instead of lifting out the repeated part of our -program wholesale, let's try to pick out a single _concept_. - -[source,javascript] ----- -function zeroPad(number, width) { - var string = String(number); - while (string.length < width) - string = "0" + string; - return string; -} - -function printFarmInventory(cows, chickens, pigs) { - console.log(zeroPad(cows, 3) + " Cows"); - console.log(zeroPad(chickens, 3) + " Chickens"); - console.log(zeroPad(pigs, 3) + " Pigs"); -} - -printFarmInventory(7, 16, 3); ----- - -(((readability)))(((pure function)))A function with a nice, obvious -name like `zeroPad` makes it easier for someone who reads the code to -figure out what it does. And it is useful in more situations than just -this specific program. For example, you could use it to help print -nicely aligned tables of numbers. - -(((interface,design)))How smart and versatile should our function be? -We could write anything from a terribly simple function that simply -pads a number so that it's three characters wide to a complicated -generalized number-formatting system that handles fractional numbers, -negative numbers, alignment of dots, padding with different -characters, and so on. - -A useful principle is not to add cleverness unless you are absolutely -sure you're going to need it. It can be tempting to write general -“((framework))s” for every little bit of functionality you come -across. Resist that urge. You won't get any real work done, and you'll -end up writing a lot of code that no one will ever use. - -[[pure]] -== Functions and side effects == - -(((side effect)))(((pure function)))(((function,purity)))Functions can -be roughly divided into those that are called for their side effects -and those that are called for their return value. (Though it is -definitely also possible to have both side effects and return a -value.) - -(((reuse)))The first helper function in the ((farm example)), -`printZeroPaddedWithLabel`, is called for its side effect: it prints a -line. The second version, `zeroPad`, is called for its return value. -It is no coincidence that the second is useful in more situations than -the first. Functions that create values are easier to combine in new -ways than functions that directly perform side effects. - -(((substitution)))A _pure_ function is a specific kind of -value-producing function that not only has no side effects but also -doesn't rely on side effects from other code—for example, it doesn't -read global variables that are occasionally changed by other code. A -pure function has the pleasant property that, when called with the -same arguments, it always produces the same value (and doesn't do -anything else). This makes it easy to reason about. A call to such a -function can be mentally substituted by its result, without changing -the meaning of the code. When you are not sure that a pure function is -working correctly, you can test it by simply calling it, and know that -if it works in that context, it will work in any context. Nonpure -functions might return different values based on all kinds of factors -and have side effects that might be hard to test and think about. - -(((optimization)))(((console.log)))Still, there's no need to feel bad -when writing functions that are not pure or to wage a holy war to -purge them from your code. Side effects are often useful. There'd be -no way to write a pure version of `console.log`, for example, and -`console.log` is certainly useful. Some operations are also easier to -express in an efficient way when we use side effects, so computing -speed can be a reason to avoid purity. - -== Summary == - -This chapter taught you how to write your own functions. The -`function` keyword, when used as an expression, can create a function -value. When used as a statement, it can be used to declare a variable -and give it a function as its value. - -[source,javascript] ----- -// Create a function value f -var f = function(a) { - console.log(a + 2); -}; - -// Declare g to be a function -function g(a, b) { - return a * b * 3.5; -} ----- - -A key aspect in understanding functions is understanding local scopes. -Parameters and variables declared inside a function are local to the -function, re-created every time the function is called, and not visible -from the outside. Functions declared inside another function have -access to the outer function's local scope. - -Separating the tasks your program performs into different -functions is helpful. You won't have to repeat yourself as much, and -functions can make a program more readable by grouping code into -conceptual chunks, in the same way that chapters and sections help -organize regular text. - -== Exercises == - -=== Minimum === - -(((Math object)))(((minimum (exercise))))(((Math.min -function)))(((minimum)))The -link:02_program_structure.html#return_values[previous chapter] -introduced the standard function `Math.min` that returns its smallest -argument. We can do that ourselves now. Write a function `min` that -takes two arguments and returns their minimum. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -console.log(min(0, 10)); -// → 0 -console.log(min(0, -10)); -// → -10 ----- -endif::interactive_target[] - -!!hint!! - -(((minimum (exercise))))If you have trouble putting braces and -parentheses in the right place to get a valid function definition, -start by copying one of the examples in this chapter and modifying it. - -(((return keyword)))A function may contain multiple `return` -statements. - -!!hint!! - -=== Recursion === - -(((recursion)))(((isEven (exercise))))(((even number)))We've seen -that `%` (the remainder operator) can be used to test whether a number -is even or odd by using `% 2` to check whether it's divisible by two. -Here's another way to define whether a positive whole number is even -or odd: - -- Zero is even. - -- One is odd. - -- For any other number _N_, its evenness is the same as _N_ - 2. - -Define a recursive function `isEven` corresponding to this -description. The function should accept a `number` parameter and -return a Boolean. - -(((stack overflow)))Test it on 50 and 75. See how it behaves on -1. -Why? Can you think of a way to fix this? - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -console.log(isEven(50)); -// → true -console.log(isEven(75)); -// → false -console.log(isEven(-1)); -// → ?? ----- -endif::interactive_target[] - -!!hint!! - -(((isEven (exercise))))(((if keyword,chaining)))(((recursion)))Your -function will likely look somewhat similar to the inner `find` -function in the recursive `findSolution` -link:03_functions.html#recursive_puzzle[example] in this chapter, with -an `if`/`else if`/`else` chain that tests which of the three cases -applies. The final `else`, corresponding to the third case, makes the -recursive call. Each of the branches should contain a `return` -statement or in some other way arrange for a specific value to be -returned. - -(((stack overflow)))When given a negative number, the function will -recurse again and again, passing itself an ever more negative number, -thus getting further and further away from returning a result. It will -eventually run out of stack space and abort. - -!!hint!! - -=== Bean counting === - -(((bean counting (exercise))))(((charAt -method)))(((string,indexing)))(((zero-based counting)))You can get the -Nth character, or letter, from a string by writing -`"string".charAt(N)`, similar to how you get its length with -`"s".length`. The returned value will be a string containing only one -character (for example, `"b"`). The first character has position zero, -which causes the last one to be found at position `string.length - 1`. -In other words, a two-character string has length 2, and its -characters have positions 0 and 1. - -Write a function `countBs` that takes a string as its only argument -and returns a number that indicates how many uppercase “B” characters -are in the string. - -Next, write a function called `countChar` that behaves like `countBs`, -except it takes a second argument that indicates the character that is -to be counted (rather than counting only uppercase “B” characters). -Rewrite `countBs` to make use of this new function. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -console.log(countBs("BBC")); -// → 2 -console.log(countChar("kakkerlak", "k")); -// → 4 ----- -endif::interactive_target[] - -!!hint!! - -(((bean counting (exercise))))(((length property,for -string)))(((counter variable)))A ((loop)) in your function will have -to look at every character in the string by running an index from zero -to one below its length (`< string.length`). If the character at the -current position is the same as the one the function is looking for, -it adds 1 to a counter variable. Once the loop has finished, the -counter can be returned. - -(((local variable)))Take care to make all the variables used in the -function _local_ to the function by using the `var` keyword. - -!!hint!! diff --git a/04_data.txt b/04_data.txt deleted file mode 100644 index fd9e77b00..000000000 --- a/04_data.txt +++ /dev/null @@ -1,1464 +0,0 @@ -:chap_num: 4 -:prev_link: 03_functions -:next_link: 05_higher_order -:load_files: ["code/jacques_journal.js", "code/chapter/04_data.js"] -:zip: node/html - -= Data Structures: Objects and Arrays = - -[chapterquote="true"] -[quote, Charles Babbage, Passages from the Life of a Philosopher (1864)] -____ -On two occasions I have been asked, ‘Pray, -Mr. Babbage, if you put into the machine wrong figures, will the right -answers come out?’ [...] I am not able rightly to apprehend the kind -of confusion of ideas that could provoke such a question. -____ - -(((Babbage+++,+++ Charles)))(((object)))(((data structure)))Numbers, Booleans, and strings are the -bricks that ((data)) structures are built from. But you can't make -much of a house out of a single brick. _Objects_ allow us to group -values—including other objects—together and thus build more complex -structures. - -The programs we have built so far have been seriously hampered by the -fact that they were operating only on simple data types. This chapter -will add a basic understanding of data structures to your toolkit. By -the end of it, you'll know enough to start writing some useful -programs. - -The chapter will work through a more or less realistic programming -example, introducing concepts as they apply to the problem at hand. -The example code will often build on functions and variables that were -introduced earlier in the text. - -ifdef::book_target[] - -(((sandbox)))The online coding sandbox for the book -(http://eloquentjavascript.net/code[_eloquentjavascript.net/code_]) -provides a way to run code in the context of a specific chapter. If -you decide to work through the examples in another environment, be -sure to first download the full code for this chapter from the -sandbox page. - -endif::book_target[] - -== The weresquirrel == - -(((weresquirrel example)))(((lycanthropy)))Every now and then, usually -between eight and ten in the evening, ((Jacques)) finds himself -transforming into a small furry rodent with a bushy tail. - -On one hand, Jacques is quite glad that he doesn't have classic -lycanthropy. Turning into a squirrel tends to cause fewer problems -than turning into a wolf. Instead of having to worry about -accidentally eating the neighbor (_that_ would be awkward), he worries -about being eaten by the neighbor's cat. After two occasions where he -woke up on a precariously thin branch in the crown of an oak, naked -and disoriented, he has taken to locking the doors and windows of his -room at night and putting a few walnuts on the floor to keep himself -busy. - -image::img/weresquirrel.png[alt="The weresquirrel"] - -That takes care of the cat and oak problems. But Jacques still suffers -from his condition. The irregular occurrences of the transformation -make him suspect that they might be triggered by something. -For a while, he believed that it happened only on days when he -had touched trees. So he stopped touching trees entirely and even -avoided going near them. But the problem persisted. - -(((journal)))Switching to a more scientific approach, Jacques intends -to start keeping a daily log of everything he did that day and whether -he changed form. With this data he hopes to narrow down the conditions -that trigger the transformations. - -The first thing he does is design a data structure to store this -information. - -== Data sets == - -(((data structure)))To work with a chunk of digital data, we'll first -have to find a way to represent it in our machine's ((memory)). Say, -as a simple example, that we want to represent a ((collection)) of -numbers: 2, 3, 5, 7, and 11. - -(((string)))We could get creative with strings—after all, strings -can be any length, so we can put a lot of data into them—and use `"2 3 -5 7 11"` as our representation. But this is awkward. You'd have to -somehow extract the digits and convert them back to numbers to access -them. - -(((array,creation)))((([] (array))))Fortunately, JavaScript -provides a data type specifically for storing sequences of values. It -is called an _array_ and is written as a list of values between -((square brackets)), separated by commas. - -[source,javascript] ----- -var listOfNumbers = [2, 3, 5, 7, 11]; -console.log(listOfNumbers[2]); -// → 5 -console.log(listOfNumbers[2 - 1]); -// → 3 ----- - -((([] (subscript))))(((array,indexing)))The notation for getting -at the elements inside an array also uses ((square brackets)). A pair -of square brackets immediately after an expression, with another -expression inside of them, will look up the element in the left-hand -expression that corresponds to the _((index))_ given by the expression -in the brackets. - -[[array_indexing]] -The first index of an array is zero, not one. So the first element can -be read with `listOfNumbers[0]`. If you don't have a programming -background, this convention might take some getting used to. But -((zero-based counting)) has a long tradition in technology, and as -long as this convention is followed consistently (which it is, in -JavaScript), it works well. - -[[properties]] -== Properties == - -(((Math object)))(((Math.max function)))(((length property,for -string)))(((object,property)))(((period character)))We've seen a few -suspicious-looking expressions like `myString.length` (to get the -length of a string) and `Math.max` (the maximum function) in past -examples. These are expressions that access a _((property))_ of some -value. In the first case, we access the `length` property of the value -in `myString`. In the second, we access the property named `max` in -the `Math` object (which is a collection of mathematics-related values -and functions). - -(((property)))(((null)))(((undefined)))Almost all JavaScript values -have properties. The exceptions are `null` and `undefined`. If you try -to access a property on one of these nonvalues, you get an error. - -// test: no - -[source,javascript] ----- -null.length; -// → TypeError: Cannot read property 'length' of null ----- - -indexsee:[dot character,period character] -((([] (subscript))))(((period character)))(((square -brackets)))(((computed property)))The two most common ways to access -properties in JavaScript are with a dot and with square brackets. Both -`value.x` and `value[x]` access a ((property)) on ++value++—but not -necessarily the same property. The difference is in how `x` is -interpreted. When using a dot, the part after the dot must be a valid -variable name, and it directly names the property. When using square -brackets, the expression between the brackets is _evaluated_ to get -the property name. Whereas `value.x` fetches the property of `value` -named “x”, `value[x]` tries to evaluate the expression `x` and uses -the result as the property name. - -So if you know that the property you are interested in is called -“length”, you say `value.length`. If you want to extract the property -named by the value held in the variable `i`, you say `value[i]`. And -because property names can be any string, if you want to access a -property named “2” or “John Doe”, you must use square brackets: -`value[2]` or `value["John Doe"]`. This is the case even though you -know the precise name of the property in advance, because neither “2” -nor “John Doe” is a valid variable name and so cannot be accessed -through dot notation. - -(((array)))(((length property,for array)))(((array,length -of)))The elements in an array are stored in properties. Because the -names of these properties are numbers and we often need to get their -name from a variable, we have to use the bracket syntax to access -them. The `length` property of an array tells us how many elements it -contains. This property name is a valid variable name, and we know its -name in advance, so to find the length of an array, you typically -write `array.length` because that is easier to write than -`array["length"]`. - -[[methods]] -== Methods == - -(((function,as property)))(((method)))(((string)))Both string and -array objects contain, in addition to the `length` property, a number -of properties that refer to function values. - -[source,javascript] ----- -var doh = "Doh"; -console.log(typeof doh.toUpperCase); -// → function -console.log(doh.toUpperCase()); -// → DOH ----- - -(((case conversion)))(((toUpperCase method)))(((toLowerCase -method)))Every string has a `toUpperCase` property. When called, it -will return a copy of the string, in which all letters have been -converted to uppercase. There is also `toLowerCase`. You can guess -what that does. - -(((this)))Interestingly, even though the call to `toUpperCase` does -not pass any arguments, the function somehow has access to the string -`"Doh"`, the value whose property we called. How this works is -described in link:06_object.html#obj_methods[Chapter 6]. - -Properties that contain functions are generally called _methods_ of -the value they belong to. As in, “++toUpperCase++ is a method of a -string”. - -[[array_methods]] -(((collection)))(((array)))(((string)))(((push -method)))(((pop method)))(((join method)))This example demonstrates -some methods that array objects have: - -[source,javascript] ----- -var mack = []; -mack.push("Mack"); -mack.push("the", "Knife"); -console.log(mack); -// → ["Mack", "the", "Knife"] -console.log(mack.join(" ")); -// → Mack the Knife -console.log(mack.pop()); -// → Knife -console.log(mack); -// → ["Mack", "the"] ----- - -The `push` method can be used to add values to the end of an array. -The `pop` method does the opposite: it removes the value at the end of -the array and returns it. An array of strings can be flattened to a -single string with the `join` method. The argument given to `join` -determines the text that is glued between the array's elements. - -== Objects == - -(((journal)))(((weresquirrel example)))(((array)))(((record)))Back to the weresquirrel. A set of daily log -entries can be represented as an array. But the entries do not consist -of just a number or a string—each entry needs to store a list of -activities and a Boolean value that indicates whether Jacques turned -into a squirrel. Ideally, we would like to group these values together -into a single value and then put these grouped values into an array of -log entries. - -(((syntax)))(((object)))(((property)))(((curly braces)))((({} -(object))))Values of the type _object_ are arbitrary collections of -properties, and we can add or remove these properties as we please. -One way to create an object is by using a curly brace notation. - -[source,javascript] ----- -var day1 = { - squirrel: false, - events: ["work", "touched tree", "pizza", "running", - "television"] -}; -console.log(day1.squirrel); -// → false -console.log(day1.wolf); -// → undefined -day1.wolf = false; -console.log(day1.wolf); -// → false ----- - -(((quoting,of object properties)))(((colon character)))Inside the -curly braces, we can give a list of properties separated by commas. -Each property is written as a name, followed by a colon, followed by -an expression that provides a value for the property. Spaces and line -breaks are not significant. When an object spans multiple lines, -indenting it like in the previous example improves readability. -Properties whose names are not valid variable names or valid numbers -have to be quoted. - -[source,javascript] ----- -var descriptions = { - work: "Went to work", - "touched tree": "Touched a tree" -}; ----- - -This means that ((curly braces)) have _two_ meanings in JavaScript. At -the start of a statement, they start a block of statements. In any -other position, they describe an object. Fortunately, it is almost -never useful to start a statement with a curly-brace object, and in -typical programs, there is no ambiguity between these two uses. - -(((undefined)))Reading a property that doesn't exist will produce the -value `undefined`, which happens the first time we try to read the `wolf` -property in the previous example. - -(((property,assignment)))(((mutability)))(((= operator)))It is -possible to assign a value to a property expression with the `=` -operator. This will replace the property's value if it already existed -or create a new property on the object if it didn't. - -(((tentacle (analogy))))(((property,model of)))To briefly return to -our tentacle model of ((variable)) bindings—property bindings are -similar. They _grasp_ values, but other variables and properties might -be holding onto those same values. You may think of objects as -octopuses with any number of tentacles, each of which has a name -inscribed on it. - -image::img/octopus-object.jpg[alt="Artist's representation of an object"] - -(((delete operator)))(((property,deletion)))The `delete` operator cuts -off a tentacle from such an octopus. It is a unary operator that, when -applied to a property access expression, will remove the named -property from the object. This is not a common thing to do, but it is -possible. - -[source,javascript] ----- -var anObject = {left: 1, right: 2}; -console.log(anObject.left); -// → 1 -delete anObject.left; -console.log(anObject.left); -// → undefined -console.log("left" in anObject); -// → false -console.log("right" in anObject); -// → true ----- - -(((in operator)))(((property,testing for)))(((object)))The binary -`in` operator, when applied to a string and an object, returns a -Boolean value that indicates whether that object has that property. -The difference between setting a property to `undefined` and actually -deleting it is that, in the first case, the object still _has_ the -property (it just doesn't have a very interesting value), whereas in -the second case the property is no longer present and `in` will return -`false`. - -(((array)))(((collection)))Arrays, then, are just a kind of -object specialized for storing sequences of things. If you evaluate -`typeof [1, 2]`, this produces `"object"`. You can see them as long, -flat octopuses with all their arms in a neat row, labeled with -numbers. - -image::img/octopus-array.jpg[alt="Artist's representation of an array"] - -(((journal)))(((weresquirrel example)))So we can represent Jacques’ -journal as an array of objects. - -[source,javascript] ----- -var journal = [ - {events: ["work", "touched tree", "pizza", - "running", "television"], - squirrel: false}, - {events: ["work", "ice cream", "cauliflower", - "lasagna", "touched tree", "brushed teeth"], - squirrel: false}, - {events: ["weekend", "cycling", "break", - "peanuts", "beer"], - squirrel: true}, - /* and so on... */ -]; ----- - -== Mutability == - -We will get to actual programming _real_ soon now. But first, there's -one last piece of theory to understand. - -(((mutability)))(((side effect)))(((number)))(((string)))(((Boolean)))(((object)))We've seen that object -values can be modified. The types of values discussed in earlier -chapters, such as numbers, strings, and Booleans, are all -__immutable__—it is impossible to change an existing value of those -types. You can combine them and derive new values from them, but when -you take a specific string value, that value will always remain the -same. The text inside it cannot be changed. If you have reference to a -string that contains `"cat"`, it is not possible for other code to -change a character in _that_ string to make it spell `"rat"`. - -With objects, on the other hand, the content of a value _can_ be -modified by changing its properties. - -(((object,identity)))(((identitiy)))(((memory)))When we have two -numbers, 120 and 120, we can consider them precisely the same number, -whether or not they refer to the same physical bits. But with objects, -there is a difference between having two references to the same object -and having two different objects that contain the same properties. -Consider the following code: - -[source,javascript] ----- -var object1 = {value: 10}; -var object2 = object1; -var object3 = {value: 10}; - -console.log(object1 == object2); -// → true -console.log(object1 == object3); -// → false - -object1.value = 15; -console.log(object2.value); -// → 15 -console.log(object3.value); -// → 10 ----- - -(((tentacle (analogy))))(((variable,model of)))The `object1` and -`object2` variables grasp the _same_ object, which is why changing -`object1` also changes the value of `object2`. The variable `object3` -points to a different object, which initially contains the same -properties as `object1` but lives a separate life. - -(((== operator)))(((comparison,of objects)))(((deep -comparison)))JavaScript's `==` operator, when comparing objects, will -return `true` only if both objects are precisely the same value. -Comparing different objects will return `false`, even if they have -identical contents. There is no “deep” comparison operation built into -JavaScript, which looks at object's contents, but it is possible to -write it yourself (which will be one of the -link:04_data.html#exercise_deep_compare[exercises] at the end of this -chapter). - -== The lycanthrope's log == - -(((weresquirrel example)))(((lycanthropy)))(((addEntry function)))So -Jacques starts up his JavaScript interpreter and sets up the -environment he needs to keep his ((journal)). - -// include_code - -[source,javascript] ----- -var journal = []; - -function addEntry(events, didITurnIntoASquirrel) { - journal.push({ - events: events, - squirrel: didITurnIntoASquirrel - }); -} ----- - -And then, every evening at ten—or sometimes the next morning, after -climbing down from the top shelf of his bookcase—he records the day. - -[source,javascript] ----- -addEntry(["work", "touched tree", "pizza", "running", - "television"], false); -addEntry(["work", "ice cream", "cauliflower", "lasagna", - "touched tree", "brushed teeth"], false); -addEntry(["weekend", "cycling", "break", "peanuts", - "beer"], true); ----- - -Once he has enough data points, he intends to compute the -((correlation)) between his squirrelification and each of the day's -events and ideally learn something useful from those correlations. - -(((correlation)))_Correlation_ is a measure of ((dependence)) between -((variable))s (“variables” in the statistical sense, not the -JavaScript sense). It is usually expressed as a coefficient that -ranges from -1 to 1. Zero correlation means the variables are not -related, whereas a correlation of one indicates that the two are -perfectly related—if you know one, you also know the other. Negative -one also means that the variables are perfectly related but that they -are opposites—when one is true, the other is false. - -(((phi coefficient)))For binary (Boolean) variables, the _phi_ -coefficient (_ϕ_) provides a good measure of correlation and is -relatively easy to compute. To compute _ϕ_, we need a ((table)) _n_ -that contains the number of times the various combinations of the two -variables were observed. For example, we could take the event of -eating ((pizza)) and put that in a table like this: - -image::img/pizza-squirrel.svg[alt="Eating pizza versus turning into a squirrel",width="7cm"] - -_ϕ_ can be computed using the following formula, where _n_ refers to the table: - -ifdef::html_target[] - -++++ -
    - - - - -
    ϕ = -
    n11n00 - n10n01
    -
    - n1•n0•n•1n•0 -
    -
    -
    -++++ - -endif::html_target[] - -ifdef::tex_target[] - -pass:[\begin{equation}\varphi = \frac{n_{11}n_{00}-n_{10}n_{01}}{\sqrt{n_{1\bullet}n_{0\bullet}n_{\bullet1}n_{\bullet0}}}\end{equation}] - -endif::tex_target[] - -The notation (!html _n_~01~!)(!tex pass:[$n_{01}$]!) indicates the -number of measurements where the first variable (squirrelness) is false -(0) and the second variable (pizza) is true (1). In this -example, (!html _n_~01~!)(!tex pass:[$n_{01}$]!) is 9. - -The value (!html _n_~1•~!)(!tex pass:[$n_{1\bullet}$]!) refers to the -sum of all measurements where the first variable is true, which is 5 -in the example table. Likewise, (!html _n_~•0~!)(!tex pass:[$n_{\bullet0}$]!) -refers to the sum of the measurements where the second variable is false. - -(((correlation)))(((phi coefficient)))So for the pizza table, the part -above the division line (the dividend) would be 1×76 - 4×9 = 40, and -the part below it (the divisor) would be the square root of -5×85×10×80, or (!html √340000!)(!tex pass:[$\sqrt{340000}$]!). This -comes out to _ϕ_ ≈ 0.069, which is tiny. Eating ((pizza)) does not -appear to have influence on the transformations. - -== Computing correlation == - -(((array,as table)))(((nesting,of arrays)))We can represent a -two-by-two ((table)) in JavaScript with a four-element array (`[76, 9, -4, 1]`). We could also use other representations, such as an array -containing two two-element arrays (`[[76, 9], [4, 1]]`) or an object -with property names like `"11"` and `"01"`, but the flat array is -simple and makes the expressions that access the table pleasantly -short. We'll interpret the indices to the array as two-((bit)) -((binary number)), where the leftmost (most significant) digit refers -to the squirrel variable and the rightmost (least significant) digit -refers to the event variable. For example, the binary number `10` -refers to the case where Jacques did turn into a squirrel, but the -event (say, "pizza") didn't occur. This happened four times. And since -binary `10` is 2 in decimal notation, we will store this number at -index 2 of the array. - -(((phi coefficient)))(((phi function)))This is the function that -computes the _ϕ_ coefficient from such an array: - -// test: clip -// include_code strip_log - -[source,javascript] ----- -function phi(table) { - return (table[3] * table[0] - table[2] * table[1]) / - Math.sqrt((table[2] + table[3]) * - (table[0] + table[1]) * - (table[1] + table[3]) * - (table[0] + table[2])); -} - -console.log(phi([76, 9, 4, 1])); -// → 0.068599434 ----- - -(((square root)))(((Math.sqrt function)))This is simply a direct -translation of the _ϕ_ formula into JavaScript. `Math.sqrt` is the -square root function, as provided by the `Math` object in a standard -JavaScript environment. We have to sum two fields from the table to -get fields like (!html n~1•~!)(!tex pass:[$n_{1\bullet}$]!) because -the sums of rows or columns are not stored directly in our data -structure. - -(((JOURNAL data set)))Jacques kept his journal for three months. The -resulting ((data set)) is available in the coding sandbox for this -chapter(!book (http://eloquentjavascript.net/code#4[_eloquentjavascript.net/code#4_])!), -where it is stored in the `JOURNAL` variable, and in a downloadable -http://eloquentjavascript.net/code/jacques_journal.js[file]. - -(((tableFor function)))(((hasEvent function)))To extract a two-by-two -((table)) for a specific event from this journal, we must loop over -all the entries and tally up how many times the event occurs in -relation to squirrel transformations. - -// include_code strip_log - -[source,javascript] ----- -function hasEvent(event, entry) { - return entry.events.indexOf(event) != -1; -} - -function tableFor(event, journal) { - var table = [0, 0, 0, 0]; - for (var i = 0; i < journal.length; i++) { - var entry = journal[i], index = 0; - if (hasEvent(event, entry)) index += 1; - if (entry.squirrel) index += 2; - table[index] += 1; - } - return table; -} - -console.log(tableFor("pizza", JOURNAL)); -// → [76, 9, 4, 1] ----- - -(((array,searching)))(((indexOf method)))The `hasEvent` function tests -whether an entry contains a given event. Arrays have an `indexOf` -method that tries to find a given value (in this case, the event name) -in the array and returns the index at which it was found or -1 if it -wasn't found. So if the call to `indexOf` doesn't return -1, then we -know the event was found in the entry. - -(((array,indexing)))The body of the loop in `tableFor` figures -out which box in the table each journal entry falls into by checking -whether the entry contains the specific event it's interested in and -whether the event happens alongside a squirrel incident. The loop then -adds one to the number in the array that corresponds to this box on -the table. - -We now have the tools we need to compute individual ((correlation))s. -The only step remaining is to find a correlation for every type of -event that was recorded and see whether anything stands out. But how -should we store these correlations once we compute them? - -== Objects as maps == - -(((weresquirrel example)))(((array)))One possible way is to store -all the ((correlation))s in an array, using objects with `name` and -`value` properties. But that makes looking up the correlation for a -given event somewhat cumbersome: you'd have to loop over the whole -array to find the object with the right `name`. We could wrap this -lookup process in a function, but we would still be writing more code, -and the computer would be doing more work than necessary. - -[[object_map]] -(((object)))(((square brackets)))(((object,as map)))(((in -operator)))A better way is to use object properties named after the -event types. We can use the square bracket access notation to create -and read the properties and can use the `in` operator to test whether -a given property exists. - -[source,javascript] ----- -var map = {}; -function storePhi(event, phi) { - map[event] = phi; -} - -storePhi("pizza", 0.069); -storePhi("touched tree", -0.081); -console.log("pizza" in map); -// → true -console.log(map["touched tree"]); -// → -0.081 ----- - -(((data structure)))A _((map))_ is a way to go from values in one -domain (in this case, event names) to corresponding values in another -domain (in this case, _ϕ_ coefficients). - -There are a few potential problems with using objects like this, which -we will discuss in link:06_object.html#prototypes[Chapter 6], but for -the time being, we won't worry about those. - -(((for/in loop)))(((for loop)))(((object,looping over)))What if -we want to find all the events for which we have stored a coefficient? -The properties don't form a predictable series, like they would in an -array, so we cannot use a normal `for` loop. JavaScript provides a -loop construct specifically for going over the properties of an -object. It looks a little like a normal `for` loop but distinguishes -itself by the use of the word `in`. - -[source,javascript] ----- -for (var event in map) - console.log("The correlation for '" + event + - "' is " + map[event]); -// → The correlation for 'pizza' is 0.069 -// → The correlation for 'touched tree' is -0.081 ----- - -[[analysis]] -== The final analysis == - -(((journal)))(((weresquirrel example)))(((gatherCorrelations -function)))To find all the types of events that are present in the -data set, we simply process each entry in turn and then loop over the -events in that entry. We keep an object `phis` that has correlation -coefficients for all the event types we have seen so far. Whenever we -run across a type that isn't in the `phis` object yet, we compute its -correlation and add it to the object. - -// test: clip -// include_code strip_log - -[source,javascript] ----- -function gatherCorrelations(journal) { - var phis = {}; - for (var entry = 0; entry < journal.length; entry++) { - var events = journal[entry].events; - for (var i = 0; i < events.length; i++) { - var event = events[i]; - if (!(event in phis)) - phis[event] = phi(tableFor(event, journal)); - } - } - return phis; -} - -var correlations = gatherCorrelations(JOURNAL); -console.log(correlations.pizza); -// → 0.068599434 ----- - -(((correlation)))Let's see what came out. - -// test: no - -[source,javascript] ----- -for (var event in correlations) - console.log(event + ": " + correlations[event]); -// → carrot: 0.0140970969 -// → exercise: 0.0685994341 -// → weekend: 0.1371988681 -// → bread: -0.0757554019 -// → pudding: -0.0648203724 -// and so on... ----- - -(((for/in loop)))Most correlations seem to lie close to zero. Eating -carrots, bread, or pudding apparently does not trigger -squirrel-lycanthropy. It _does_ seem to occur somewhat more often on -weekends, however. Let's filter the results to show only correlations -greater than 0.1 or less than -0.1. - -// start_code -// test: no - -[source,javascript] ----- -for (var event in correlations) { - var correlation = correlations[event]; - if (correlation > 0.1 || correlation < -0.1) - console.log(event + ": " + correlation); -} -// → weekend: 0.1371988681 -// → brushed teeth: -0.3805211953 -// → candy: 0.1296407447 -// → work: -0.1371988681 -// → spaghetti: 0.2425356250 -// → reading: 0.1106828054 -// → peanuts: 0.5902679812 ----- - -A-ha! There are two factors whose ((correlation)) is clearly stronger -than the others. Eating ((peanuts)) has a strong positive effect on -the chance of turning into a squirrel, whereas brushing his teeth has -a significant negative effect. - -Interesting. Let's try something. - -// include_code strip_log - -[source,javascript] ----- -for (var i = 0; i < JOURNAL.length; i++) { - var entry = JOURNAL[i]; - if (hasEvent("peanuts", entry) && - !hasEvent("brushed teeth", entry)) - entry.events.push("peanut teeth"); -} -console.log(phi(tableFor("peanut teeth", JOURNAL))); -// → 1 ----- - -Well, that's unmistakable! The phenomenon occurs precisely when -Jacques eats ((peanuts)) and fails to brush his teeth. If only he -weren't such a slob about dental hygiene, he'd have never even noticed -his affliction. - -Knowing this, Jacques simply stops eating peanuts altogether and finds -that this completely puts an end to his transformations. - -(((weresquirrel example)))All is well with Jacques for a while. But a -few years later, he loses his ((job)) and is eventually forced to take -employment with a ((circus)), where he performs as _The Incredible -Squirrelman_ by stuffing his mouth with peanut butter before every -show. One day, fed up with this pitiful existence, Jacques fails to -change back into his human form, hops through a crack in the circus -tent, and vanishes into the forest. He is never seen again. - -== Further arrayology == - -(((array,methods)))(((method)))Before finishing up this chapter, -I want to introduce you to a few more object-related concepts. We'll -start by introducing some generally useful array methods. - -(((push method)))(((pop method)))(((shift method)))(((unshift -method)))We saw `push` and `pop`, which add and remove elements at the -end of an array, link:04_data.html#array_methods[earlier] in this -chapter. The corresponding methods for adding and removing things at -the start of an array are called `unshift` and `shift`. - -[source,javascript] ----- -var todoList = []; -function rememberTo(task) { - todoList.push(task); -} -function whatIsNext() { - return todoList.shift(); -} -function urgentlyRememberTo(task) { - todoList.unshift(task); -} ----- - -(((task management example)))The previous program manages lists of -tasks. You add tasks to the end of the list by calling -`rememberTo("eat")`, and when you're ready to do something, you call -`whatIsNext()` to get (and remove) the front item from the list. The -`urgentlyRememberTo` function also adds a task but adds it to the -front instead of the back of the list. - -(((array,searching)))(((indexOf method)))(((lastIndexOf -method)))The `indexOf` method has a sibling called `lastIndexOf`, -which starts searching for the given element at the end of the array -instead of the front. - -[source,javascript] ----- -console.log([1, 2, 3, 2, 1].indexOf(2)); -// → 1 -console.log([1, 2, 3, 2, 1].lastIndexOf(2)); -// → 3 ----- - -Both `indexOf` and `lastIndexOf` take an optional second argument that -indicates where to start searching from. - -(((slice method)))(((array,indexing)))Another fundamental method -is `slice`, which takes a start index and an end index and returns an -array that has only the elements between those indices. The start -index is inclusive, the end index exclusive. - -[source,javascript] ----- -console.log([0, 1, 2, 3, 4].slice(2, 4)); -// → [2, 3] -console.log([0, 1, 2, 3, 4].slice(2)); -// → [2, 3, 4] ----- - -(((string,indexing)))When the end index is not given, `slice` -will take all of the elements after the start index. Strings also have -a `slice` method, which has a similar effect. - -(((concatenation)))(((concat method)))The `concat` method can be used -to glue arrays together, similar to what the `+` operator does for -strings. The following example shows both `concat` and `slice` in -action. It takes an array and an index, and it returns a new array -that is a copy of the original array with the element at the given -index removed. - -[source,javascript] ----- -function remove(array, index) { - return array.slice(0, index) - .concat(array.slice(index + 1)); -} -console.log(remove(["a", "b", "c", "d", "e"], 2)); -// → ["a", "b", "d", "e"] ----- - -== Strings and their properties == - -(((string,properties)))We can read properties like `length` and -`toUpperCase` from string values. But if you try to add a new -property, it doesn't stick. - -[source,javascript] ----- -var myString = "Fido"; -myString.myProperty = "value"; -console.log(myString.myProperty); -// → undefined ----- - -Values of type string, number, and Boolean are not objects, and though -the language doesn't complain if you try to set new properties on -them, it doesn't actually store those properties. The values are -immutable and cannot be changed. - -(((string,methods)))(((slice method)))(((indexOf -method)))(((string,searching)))But these types do have some built-in -properties. Every string value has a number of methods. The most -useful ones are probably `slice` and `indexOf`, which resemble the -array methods of the same name. - -[source,javascript] ----- -console.log("coconuts".slice(4, 7)); -// → nut -console.log("coconut".indexOf("u")); -// → 5 ----- - -One difference is that a string's `indexOf` can take a string -containing more than one character, whereas the corresponding array -method looks only for a single element. - -[source,javascript] ----- -console.log("one two three".indexOf("ee")); -// → 11 ----- - -(((whitespace)))(((trim method)))The `trim` method removes whitespace -(spaces, newlines, tabs, and similar characters) from the start and -end of a string. - -[source,javascript] ----- -console.log(" okay \n ".trim()); -// → okay ----- - -(((length property,for string)))(((charAt -method)))(((string,indexing)))We have already seen the string type's -`length` property. Accessing the individual characters in a string can -be done with the `charAt` method but also by simply reading numeric -properties, like you'd do for an array. - -[source,javascript] ----- -var string = "abc"; -console.log(string.length); -// → 3 -console.log(string.charAt(0)); -// → a -console.log(string[1]); -// → b ----- - -[[arguments_object]] -== The arguments object == - -(((arguments object)))(((length -property)))(((parameter)))(((optional argument)))(((array-like -object)))Whenever a function is called, a special variable named -`arguments` is added to the environment in which the function body -runs. This variable refers to an object that holds all of the -arguments passed to the function. Remember that in JavaScript you are -allowed to pass more (or fewer) arguments to a function than the -number of parameters the function itself declares. - -[source,javascript] ----- -function noArguments() {} -noArguments(1, 2, 3); // This is okay -function threeArguments(a, b, c) {} -threeArguments(); // And so is this ----- - -(((length property)))The `arguments` object has a `length` property -that tells us the number of arguments that were really passed to the -function. It also has a property for each argument, named 0, 1, 2, and -so on. - -indexsee:[pseudo array,array-like object] -(((array,methods)))If that sounds a lot like an array to you, -you're right, it _is_ a lot like an array. But this object, -unfortunately, does not have any array methods (like `slice` or -`indexOf`), so it is a little harder to use than a real array. - -[source,javascript] ----- -function argumentCounter() { - console.log("You gave me", arguments.length, "arguments."); -} -argumentCounter("Straw man", "Tautology", "Ad hominem"); -// → You gave me 3 arguments. ----- - -(((journal)))(((console.log)))(((variadic function)))Some functions -can take any number of arguments, like `console.log`. These typically -loop over the values in their `arguments` object. They can be used to -create very pleasant interfaces. For example, remember how we created -the entries to Jacques’ journal. - -[source,javascript] ----- -addEntry(["work", "touched tree", "pizza", "running", - "television"], false); ----- - -Since he is going to be calling this function a lot, we could create -an alternative that is easier to call. - -[source,javascript] ----- -function addEntry(squirrel) { - var entry = {events: [], squirrel: squirrel}; - for (var i = 1; i < arguments.length; i++) - entry.events.push(arguments[i]); - journal.push(entry); -} -addEntry(true, "work", "touched tree", "pizza", - "running", "television"); ----- - -(((arguments object,indexing)))This version reads its first argument -(`squirrel`) in the normal way and then goes over the rest of the -arguments (the loop starts at index 1, skipping the first) to gather -them into an array. - -== The Math object == - -(((Math object)))(((Math.min function)))(((Math.max -function)))(((Math.sqrt function)))(((minimum)))(((maximum)))(((square -root)))As we've seen, `Math` is a grab-bag of number-related utility -functions, such as `Math.max` (maximum), `Math.min` (minimum), and -`Math.sqrt` (square root). - -[[namespace_pollution]] -(((namespace)))(((namespace pollution)))(((object)))The -`Math` object is used simply as a container to group a bunch of -related functionality. There is only one `Math` object, and it is -almost never useful as a value. Rather, it provides a _namespace_ so -that all these functions and values do not have to be global -variables. - -(((variable,naming)))Having too many global variables “pollutes” the -namespace. The more names that have been taken, the more likely you -are to accidentally overwrite the value of some variable. For example, -it's not unlikely that you'll want to name something `max` in one of -your programs. Since JavaScript's built-in `max` function is tucked -safely inside the `Math` object, we don't have to worry about -overwriting it. - -Many languages will stop you, or at least warn you, when you are -defining a variable with a name that is already taken. JavaScript does -neither, so be careful. - -(((Math.cos function)))(((Math.sin function)))(((Math.tan -function)))(((Math.acos function)))(((Math.asin -function)))(((Math.atan function)))(((Math.PI -constant)))(((cosine)))(((sine)))(((tangent)))(((PI constant)))(((pi)))Back to -the `Math` object. If you need to do ((trigonometry)), `Math` can -help. It contains `cos` (cosine), `sin` (sine), and `tan` (tangent), -as well as their inverse functions, `acos`, `asin`, and `atan`, respectively. The -number π (pi)—or at least the closest approximation that fits in a -JavaScript number—is available as `Math.PI`. (There is an old -programming tradition of writing the names of ((constant)) values in -all caps.) - -// test: no - -[source,javascript] ----- -function randomPointOnCircle(radius) { - var angle = Math.random() * 2 * Math.PI; - return {x: radius * Math.cos(angle), - y: radius * Math.sin(angle)}; -} -console.log(randomPointOnCircle(2)); -// → {x: 0.3667, y: 1.966} ----- - -If sines and cosines are not something you are very familiar with, -don't worry. When they are used in this book, in -link:13_dom.html#sin_cos[Chapter 13], I'll explain them. - -(((Math.random function)))(((random number)))The previous example -uses `Math.random`. This is a function that returns a new -pseudorandom number between zero (inclusive) and one (exclusive) -every time you call it. - -// test: no - -[source,javascript] ----- -console.log(Math.random()); -// → 0.36993729369714856 -console.log(Math.random()); -// → 0.727367032552138 -console.log(Math.random()); -// → 0.40180766698904335 ----- - -(((pseudorandom number)))(((random number)))Though computers are -deterministic machines—they always react the same way if given the -same input—it is possible to have them produce numbers that appear -random. To do this, the machine keeps a number (or a bunch of numbers) -in its internal state. Then, every time a random number is requested, -it performs some complicated deterministic computations on this -internal state and returns part of the result of those computations. -The machine also uses the outcome to change its own internal state so -that the next “random” number produced will be different. - -(((rounding)))(((Math.floor function)))If we want a whole random -number instead of a fractional one, we can use `Math.floor` (which -rounds down to the nearest whole number) on the result of -`Math.random`. - -// test: no - -[source,javascript] ----- -console.log(Math.floor(Math.random() * 10)); -// → 2 ----- - -Multiplying the random number by 10 gives us a number greater than or -equal to zero, and below 10. Since `Math.floor` rounds down, this -expression will produce, with equal chance, any number from 0 through -9. - -(((Math.ceil function)))(((Math.round function)))There are also the -functions `Math.ceil` (for “ceiling”, which rounds up to a whole -number) and `Math.round` (to the nearest whole number). - -== The global object == - -(((global object)))(((window variable)))(((global -scope)))(((scope)))(((object)))The global scope, the space in which -global variables live, can also be approached as an object in -JavaScript. Each global variable is present as a ((property)) of this -object. In ((browser))s, the global scope object is stored in the -`window` variable. - -// test: no - -[source,javascript] ----- -var myVar = 10; -console.log("myVar" in window); -// → true -console.log(window.myVar); -// → 10 ----- - -== Summary == - -Objects and arrays (which are a specific kind of object) provide ways -to group several values into a single value. Conceptually, this allows -us to put a bunch of related things in a bag and run around with the -bag, instead of trying to wrap our arms around all of the individual -things and trying to hold on to them separately. - -Most values in JavaScript have properties, the exceptions being `null` -and `undefined`. Properties are accessed using `value.propName` or -`value["propName"]`. Objects tend to use names for their properties -and store more or less a fixed set of them. Arrays, on the other hand, -usually contain varying numbers of conceptually identical values and -use numbers (starting from 0) as the names of their properties. - -There _are_ some named properties in arrays, such as `length` and a -number of methods. Methods are functions that live in properties and -(usually) act on the value they are a property of. - -Objects can also serve as maps, associating values with names. The `in` -operator can be used to find out whether an object contains a property with -a given name. The same keyword can also be used in a `for` loop -(`for (var name in object)`) to loop over an object's properties. - -== Exercises == - -=== The sum of a range === - -(((summing (exercise))))The link:00_intro.html#intro[introduction] of this book alluded to the -following as a nice way to compute the sum of a range of numbers: - -// test: no - -[source,javascript] ----- -console.log(sum(range(1, 10))); ----- - -(((range function)))(((sum function)))Write a `range` function that -takes two arguments, `start` and `end`, and returns an array -containing all the numbers from `start` up to (and including) `end`. - -Next, write a `sum` function that takes an array of numbers and -returns the sum of these numbers. Run the previous program and see -whether it does indeed return 55. - -(((optional argument)))As a bonus assignment, modify your `range` -function to take an optional third argument that indicates the “step” -value used to build up the array. If no step is given, the array -elements go up by increments of one, corresponding to the old -behavior. The function call `range(1, 10, 2)` should return `[1, 3, 5, -7, 9]`. Make sure it also works with negative step values so that -`range(5, 2, -1)` produces `[5, 4, 3, 2]`. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -console.log(range(1, 10)); -// → [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] -console.log(range(5, 2, -1)); -// → [5, 4, 3, 2] -console.log(sum(range(1, 10))); -// → 55 ----- -endif::interactive_target[] - -!!hint!! - -(((summing (exercise))))(((array,creation)))(((square -brackets)))Building up an array is most easily done by first -initializing a variable to `[]` (a fresh, empty array) and repeatedly -calling its `push` method to add a value. Don't forget to return the -array at the end of the function. - -(((array,indexing)))(((comparison)))Since the end boundary is -inclusive, you'll need to use the `<=` operator rather than simply `<` -to check for the end of your loop. - -(((arguments object)))To check whether the optional step argument was -given, either check `arguments.length` or compare the value of the -argument to `undefined`. If it wasn't given, simply set it to its -((default value)) (1) at the top of the function. - -(((range function)))(((for loop)))Having `range` understand negative -step values is probably best done by writing two separate loops—one -for counting up and one for counting down—because the comparison that -checks whether the loop is finished needs to be `>=` rather than `<=` -when counting downward. - -It might also be worthwhile to use a different default step, namely, --1, when the end of the range is smaller than the start. That way, -`range(5, 2)` returns something meaningful, rather than getting stuck -in an ((infinite loop)). - -!!hint!! - -=== Reversing an array === - -(((reversing (exercise))))(((reverse -method)))(((array,methods)))Arrays have a method `reverse`, which -changes the array by inverting the order in which its elements appear. -For this exercise, write two functions, `reverseArray` and -`reverseArrayInPlace`. The first, `reverseArray`, takes an array as -argument and produces a _new_ array that has the same elements in the -inverse order. The second, `reverseArrayInPlace`, does what the -`reverse` method does: it modifies the array given as argument in -order to reverse its elements. Neither may use the standard -`reverse` method. - -(((efficiency)))(((pure function)))(((side effect)))Thinking back to -the notes about side effects and pure functions in the -link:03_functions.html#pure[previous chapter], which variant do you -expect to be useful in more situations? Which one is more efficient? - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -console.log(reverseArray(["A", "B", "C"])); -// → ["C", "B", "A"]; -var arrayValue = [1, 2, 3, 4, 5]; -reverseArrayInPlace(arrayValue); -console.log(arrayValue); -// → [5, 4, 3, 2, 1] ----- -endif::interactive_target[] - -!!hint!! - -(((reversing (exercise))))There are two obvious ways to implement -`reverseArray`. The first is to simply go over the input array from -front to back and use the `unshift` method on the new array to insert -each element at its start. The second is to loop over the input array -backward and use the `push` method. Iterating over an array backward -requires a (somewhat awkward) `for` specification like `(var i = -array.length - 1; i >= 0; i--)`. - -Reversing the array in place is harder. You have to be careful not to -overwrite elements that you will later need. Using `reverseArray` or -otherwise copying the whole array (`array.slice(0)` is a good way to -copy an array) works but is cheating. - -The trick is to _swap_ the first and last elements, then the -second and second-to-last, and so on. You can do this by looping -over half the length of the array (use `Math.floor` to round down—you -don't need to touch the middle element in an array with an odd -length) and swapping the element at position `i` with the one at -position `array.length - 1 - i`. You can use a local variable to -briefly hold on to one of the elements, overwrite that one with its -mirror image, and then put the value from the local variable in the -place where the mirror image used to be. - -!!hint!! - -[[list]] -=== A list === - -(((data structure)))(((list (exercise))))(((linked -list)))(((object)))(((array)))(((collection)))Objects, as generic -blobs of values, can be used to build all sorts of data structures. A -common data structure is the _list_ (not to be confused with the -array). A list is a nested set of objects, with the first object -holding a reference to the second, the second to the third, and so on. - -// include_code - -[source,javascript] ----- -var list = { - value: 1, - rest: { - value: 2, - rest: { - value: 3, - rest: null - } - } -}; ----- - -The resulting objects form a chain, like this: - -image::img/linked-list.svg[alt="A linked list",width="6cm"] - -(((structure sharing)))(((memory)))A nice thing about lists is that -they can share parts of their structure. For example, if I create two -new values `{value: 0, rest: list}` and `{value: -1, rest: list}` -(with `list` referring to the variable defined earlier), they are both -independent lists, but they share the structure that makes up their -last three elements. In addition, the original list is also still a -valid three-element list. - -Write a function `arrayToList` that builds up a data structure like -the previous one when given `[1, 2, 3]` as argument, and write a -`listToArray` function that produces an array from a list. Also write -the helper functions `prepend`, which takes an element and a list and -creates a new list that adds the element to the front of the input -list, and `nth`, which takes a list and a number and returns the -element at the given position in the list, or `undefined` when there -is no such element. - -(((recursion)))If you haven't already, also write a recursive version -of `nth`. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -console.log(arrayToList([10, 20])); -// → {value: 10, rest: {value: 20, rest: null}} -console.log(listToArray(arrayToList([10, 20, 30]))); -// → [10, 20, 30] -console.log(prepend(10, prepend(20, null))); -// → {value: 10, rest: {value: 20, rest: null}} -console.log(nth(arrayToList([10, 20, 30]), 1)); -// → 20 ----- -endif::interactive_target[] - -!!hint!! - -(((list (exercise))))(((linked list)))Building up a list is best done -back to front. So `arrayToList` could iterate over the array backward -(see previous exercise) and, for each element, add an object to the -list. You can use a local variable to hold the part of the list that -was built so far and use a pattern like `list = {value: X, rest: -list}` to add an element. - -(((for loop)))To run over a list (in `listToArray` and `nth`), a `for` -loop specification like this can be used: - -[source,javascript] ----- -for (var node = list; node; node = node.rest) {} ----- - -Can you see how that works? Every iteration of the loop, `node` points -to the current sublist, and the body can read its `value` property to -get the current element. At the end of an iteration, `node` moves to -the next sublist. When that is null, we have reached the end of the -list and the loop is finished. - -(((recursion)))The recursive version of `nth` will, similarly, look at -an ever smaller part of the “tail” of the list and at the same time -count down the index until it reaches zero, at which point it can -return the `value` property of the node it is looking at. To get the -zeroeth element of a list, you simply take the `value` property of its -head node. To get element _N_ + 1, you take the __N__th element of the -list that's in this list's `rest` property. - -!!hint!! - -[[exercise_deep_compare]] -=== Deep comparison === - -(((deep comparison (exercise))))(((comparison)))(((deep -comparison)))(((== operator)))The `==` operator compares objects by -identity. But sometimes, you would prefer to compare the values of -their actual properties. - -Write a function, `deepEqual`, that takes two values and returns true -only if they are the same value or are objects with the same -properties whose values are also equal when compared with a recursive -call to `deepEqual`. - -(((null)))(((=== operator)))(((typeof operator)))To find out whether -to compare two things by identity (use the `===` operator for that) or -by looking at their properties, you can use the `typeof` operator. If -it produces `"object"` for both values, you should do a deep -comparison. But you have to take one silly exception into account: by -a historical accident, `typeof null` also produces `"object"`. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -var obj = {here: {is: "an"}, object: 2}; -console.log(deepEqual(obj, obj)); -// → true -console.log(deepEqual(obj, {here: 1, object: 2})); -// → false -console.log(deepEqual(obj, {here: {is: "an"}, object: 2})); -// → true ----- -endif::interactive_target[] - -!!hint!! - -(((deep comparison (exercise))))(((typeof operator)))(((object)))(((=== operator)))Your test for whether you are dealing with a -real object will look something like `typeof x == "object" && x != -null`. Be careful to compare properties only when _both_ arguments are -objects. In all other cases you can just immediately return the result -of applying `===`. - -(((for/in loop)))(((in operator)))Use a `for`/`in` loop to go over the -properties. You need to test whether both objects have the same set of -property names and whether those properties have identical values. The -first test can be done by counting the properties in both objects and -returning false if the numbers of properties are different. If they're -the same, then go over the properties of one object, and for each of -them, verify that the other object also has the property. The values -of the properties are compared by a recursive call to `deepEqual`. - -(((return value)))Returning the correct value from the function is -best done by immediately returning false when a mismatch is noticed -and returning true at the end of the function. - -!!hint!! diff --git a/05_higher_order.txt b/05_higher_order.txt deleted file mode 100644 index b1c35a600..000000000 --- a/05_higher_order.txt +++ /dev/null @@ -1,1100 +0,0 @@ -:chap_num: 5 -:prev_link: 04_data -:next_link: 06_object -:load_files: ["code/ancestry.js", "code/chapter/05_higher_order.js", "code/intro.js"] -:zip: node/html - -= Higher-Order Functions = - -ifdef::interactive_target[] - -[chapterquote="true"] -[quote, Master Yuan-Ma, The Book of Programming] -____ -Tzu-li and Tzu-ssu were -boasting about the size of their latest programs. ‘Two-hundred -thousand lines,’ said Tzu-li, ‘not counting comments!’ Tzu-ssu -responded, ‘Pssh, mine is almost a *million* lines already.’ Master -Yuan-Ma said, ‘My best program has five hundred lines.’ Hearing this, -Tzu-li and Tzu-ssu were enlightened. -____ - -endif::interactive_target[] - -[chapterquote="true"] -[quote, C.A.R. Hoare, 1980 ACM Turing Award Lecture] -____ -(((Hoare+++,+++ C.A.R.)))There are two ways of constructing a software -design: One way is to make it so simple that there are obviously no -deficiencies, and the other way is to make it so complicated that -there are no obvious deficiencies. -____ - -(((program size)))A large program is a costly program, and not just -because of the time it takes to build. Size almost always involves -((complexity)), and complexity confuses programmers. Confused -programmers, in turn, tend to introduce mistakes (_((bug))s_) into -programs. A large program also provides a lot of space for these bugs -to hide, making them hard to find. - -(((summing example)))Let's briefly go back to the final two example -programs in the introduction. The first is self-contained and six -lines long. - -[source,javascript] ----- -var total = 0, count = 1; -while (count <= 10) { - total += count; - count += 1; -} -console.log(total); ----- - -The second relies on two external functions and is one line long. - -[source,javascript] ----- -console.log(sum(range(1, 10))); ----- - -Which one is more likely to contain a bug? - -(((program size)))If we count the size of the definitions of `sum` and -`range`, the second program is also big—even bigger than the first. -But still, I'd argue that it is more likely to be correct. - -(((abstraction)))(((domain-specific language)))It is more likely to -be correct because the solution is expressed in a ((vocabulary)) that -corresponds to the problem being solved. Summing a range of -numbers isn't about loops and counters. It is about ranges and sums. - -The definitions of this vocabulary (the functions `sum` and `range`) -will still involve loops, counters, and other incidental details. But -because they are expressing simpler concepts than the program as a -whole, they are easier to get right. - -== Abstraction == - -In the context of programming, these kinds of vocabularies are usually -called _((abstraction))s_. Abstractions hide details and give us the -ability to talk about problems at a higher (or more abstract) level. - -(((recipe analogy)))(((pea soup)))As an analogy, compare these two -recipes for pea soup: - -____ -Put 1 cup of dried peas per person into a container. Add water until -the peas are well covered. Leave the peas in water for at least 12 hours. -Take the peas out of the water and put them in a cooking pan. Add 4 -cups of water per person. Cover the pan and keep the peas -simmering for two hours. Take half an onion per person. Cut it into -pieces with a knife. Add it to the peas. Take a stalk of celery per -person. Cut it into pieces with a knife. Add it to the peas. Take a -carrot per person. Cut it into pieces. With a knife! Add it to the -peas. Cook for 10 more minutes. -____ - -And the second recipe: - -____ -Per person: 1 cup dried split peas, half a chopped onion, a stalk of -celery, and a carrot. - -Soak peas for 12 hours. Simmer for 2 hours in 4 cups of water -(per person). Chop and add vegetables. Cook for 10 more minutes. -____ - -(((vocabulary)))The second is shorter and easier to interpret. But -you do need to understand a few more cooking-related words—__soak__, -_simmer_, _chop_, and, I guess, _vegetable_. - -When programming, we can't rely on all the words we need to be waiting -for us in the dictionary. Thus, you might fall into the pattern of the -first recipe—work out the precise steps the computer has to perform, -one by one, blind to the higher-level concepts that they express. - -(((abstraction)))It has to become second nature, for a programmer, to -notice when a concept is begging to be abstracted into a new word. - -== Abstracting array traversal == - -(((array)))Plain functions, as we've seen them so far, are a good -way to build abstractions. But sometimes they fall short. - -(((for loop)))In the link:04_data.html#data[previous chapter], this -type of `for` ((loop)) made several appearances: - -[source,javascript] ----- -var array = [1, 2, 3]; -for (var i = 0; i < array.length; i++) { - var current = array[i]; - console.log(current); -} ----- - -(((length property,for -array)))(((array,indexing)))(((readability)))It's trying to say, “For -each element in the array, log it to the console”. But it uses a -roundabout way that involves a counter variable `i`, a check against -the array's length, and an extra variable declaration to pick out the -current element. Apart from being a bit of an eyesore, this provides a -lot of space for potential mistakes. We might accidentally reuse the -`i` variable, misspell `length` as `lenght`, confuse the `i` and `current` -variables, and so on. - -So let's try to abstract this into a function. Can you think of a way? - -Well, it's easy to write a function that goes over an array and calls -`console.log` on every element. - -[source,javascript] ----- -function logEach(array) { - for (var i = 0; i < array.length; i++) - console.log(array[i]); -} ----- - -[[forEach]] -indexsee:[higher-order function,function+++,+++ higher-order](((function,higher-order)))(((loop)))(((array,traversal)))(((function,as value)))(((forEach method)))But what -if we want to do something other than logging the elements? Since -“doing something” can be represented as a function and functions are -just values, we can pass our action as a function value. - -[source,javascript] ----- -function forEach(array, action) { - for (var i = 0; i < array.length; i++) - action(array[i]); -} - -forEach(["Wampeter", "Foma", "Granfalloon"], console.log); -// → Wampeter -// → Foma -// → Granfalloon ----- - -(In some browsers, calling `console.log` in this way does not work. -You can use `alert` instead of `console.log` if this example fails to -work.) - -Often, you don't pass a predefined function to `forEach` but create -a function value on the spot instead. - -[source,javascript] ----- -var numbers = [1, 2, 3, 4, 5], sum = 0; -forEach(numbers, function(number) { - sum += number; -}); -console.log(sum); -// → 15 ----- - -(((loop body)))(((curly braces)))This looks quite a lot like the -classical `for` loop, with its body written as a block below it. -However, now the body is inside the function value, as well as -inside the ((parentheses)) of the call to `forEach`. This is why it -has to be closed with the closing brace _and_ closing parenthesis. - -(((local variable)))(((parameter)))Using this pattern, we can -specify a variable name for the current element (`number`), rather -than having to pick it out of the array manually. - -(((array,methods)))(((function,higher-order)))(((forEach -method)))(((array)))In fact, we don't need to write `forEach` -ourselves. It is available as a standard method on arrays. Since the -array is already provided as the thing the method acts on, `forEach` -takes only one required argument: the function to be executed for each -element. - -To illustrate how helpful this is, let's look back at a function -from link:04_data.html#analysis[the previous chapter]. It contains two -array-traversing ((loop))s. - -[source,javascript] ----- -function gatherCorrelations(journal) { - var phis = {}; - for (var entry = 0; entry < journal.length; entry++) { - var events = journal[entry].events; - for (var i = 0; i < events.length; i++) { - var event = events[i]; - if (!(event in phis)) - phis[event] = phi(tableFor(event, journal)); - } - } - return phis; -} ----- - -(((forEach method)))Working with `forEach` makes it slightly shorter -and quite a bit cleaner. - -[source,javascript] ----- -function gatherCorrelations(journal) { - var phis = {}; - journal.forEach(function(entry) { - entry.events.forEach(function(event) { - if (!(event in phis)) - phis[event] = phi(tableFor(event, journal)); - }); - }); - return phis; -} ----- - -== Higher-order functions == - -(((function,higher-order)))(((function,as value)))Functions that -operate on other functions, either by taking them as arguments or by -returning them, are called _higher-order functions_. If you have -already accepted the fact that functions are regular values, there is -nothing particularly remarkable about the fact that such functions -exist. The term comes from ((mathematics)), where the distinction -between functions and other values is taken more seriously. - -(((abstraction)))Higher-order functions allow us to abstract over -_actions_, not just values. They come in several forms. For example, -you can have functions that create new functions. - -[source,javascript] ----- -function greaterThan(n) { - return function(m) { return m > n; }; -} -var greaterThan10 = greaterThan(10); -console.log(greaterThan10(11)); -// → true ----- - -And you can have functions that change other functions. - -[source,javascript] ----- -function noisy(f) { - return function(arg) { - console.log("calling with", arg); - var val = f(arg); - console.log("called with", arg, "- got", val); - return val; - }; -} -noisy(Boolean)(0); -// → calling with 0 -// → called with 0 - got false ----- - -You can even write functions that provide new types of ((control flow)). - -[source,javascript] ----- -function unless(test, then) { - if (!test) then(); -} -function repeat(times, body) { - for (var i = 0; i < times; i++) body(i); -} - -repeat(3, function(n) { - unless(n % 2, function() { - console.log(n, "is even"); - }); -}); -// → 0 is even -// → 2 is even ----- - -(((inner function)))(((nesting,of functions)))((({} -(block))))(((local variable)))(((closure)))The ((lexical scoping)) -rules that we discussed in link:03_functions.html#scoping[Chapter 3] -work to our advantage when using functions in this way. In the previous example, the `n` variable is a ((parameter)) to the outer function. -Because the inner function lives inside the environment of the outer -one, it can use `n`. The bodies of such inner functions can access the -variables around them. They can play a role similar to the `{}` blocks -used in regular loops and conditional statements. An important -difference is that variables declared inside inner functions do not -end up in the environment of the outer function. And that is usually a -good thing. - -== Passing along arguments == - -(((function,wrapping)))(((arguments object)))The `noisy` function -defined earlier, which wraps its argument in another function, has a rather -serious deficit. - -[source,javascript] ----- -function noisy(f) { - return function(arg) { - console.log("calling with", arg); - var val = f(arg); - console.log("called with", arg, "- got", val); - return val; - }; -} ----- - -If `f` takes more than one ((parameter)), it gets only the first one. -We could add a bunch of arguments to the inner function (`arg1`, -`arg2`, and so on) and pass them all to `f`, but it is not clear how many -would be enough. This solution would also deprive `f` of the -information in `arguments.length`. Since we'd always pass the same -number of arguments, it wouldn't know how many arguments were -originally given. - -(((apply method)))(((array-like object)))(((function,application)))For -these kinds of situations, JavaScript functions have an `apply` -method. You pass it an array (or array-like object) of arguments, and -it will call the function with those arguments. - -[source,javascript] ----- -function transparentWrapping(f) { - return function() { - return f.apply(null, arguments); - }; -} ----- - -(((null)))That's a useless function, but it shows the pattern we are -interested in—the function it returns passes all of the given -arguments, and only those arguments, to `f`. It does this by passing -its own `arguments` object to `apply`. The first argument to `apply`, -for which we are passing `null` here, can be used to simulate a -((method)) call. We will come back to that in the -link:06_object.html#call_method[next chapter]. - -== JSON == - -(((array)))(((function,higher-order)))(((forEach method)))(((data -set)))Higher-order functions that somehow apply a function to the -elements of an array are widely used in JavaScript. The `forEach` -method is the most primitive such function. There are a number of -other variants available as methods on arrays. To familiarize -ourselves with them, let's play around with another data set. - -(((ancestry example)))A few years ago, someone crawled through a lot -of archives and put together a book on the history of my family name -(Haverbeke—meaning Oatbrook). I opened it hoping to find -knights, pirates, and alchemists ... but the book turns out to be -mostly full of Flemish ((farmer))s. For my amusement, I extracted the -information on my direct ancestors and put it into a -computer-readable format. - -(((data format)))(((JSON)))The file I created looks something like -this: - -[source,application/json] ----- -[ - {"name": "Emma de Milliano", "sex": "f", - "born": 1876, "died": 1956, - "father": "Petrus de Milliano", - "mother": "Sophia van Damme"}, - {"name": "Carolus Haverbeke", "sex": "m", - "born": 1832, "died": 1905, - "father": "Carel Haverbeke", - "mother": "Maria van Brussel"}, - … and so on -] ----- - -indexsee:[JavaScript Object Notation,JSON](((World Wide Web)))This format is called JSON (pronounced “Jason”), -which stands for JavaScript Object Notation. It is widely used as a -data storage and communication format on the Web. - -(((array)))(((object)))(((quoting,in JSON)))(((comment)))JSON is similar to -JavaScript's way of writing arrays and objects, with a few -restrictions. All property names have to be surrounded by double quotes, and -only simple data expressions are allowed—no function calls, -variables, or anything that involves actual computation. Comments are not -allowed in JSON. - -(((JSON.stringify function)))(((JSON.parse -function)))(((serialization)))(((deserialization)))(((parsing)))JavaScript -gives us functions, `JSON.stringify` and `JSON.parse`, that convert -data to and from this format. The first takes a JavaScript value and -returns a JSON-encoded string. The second takes such a string and -converts it to the value it encodes. - -[source,javascript] ----- -var string = JSON.stringify({name: "X", born: 1980}); -console.log(string); -// → {"name":"X","born":1980} -console.log(JSON.parse(string).born); -// → 1980 ----- - -(((ANCESTRY_FILE data set)))The variable `ANCESTRY_FILE`, available in -the ((sandbox)) for this chapter and in -http://eloquentjavascript.net/code/ancestry.js[a downloadable file] on -the website(!book (http://eloquentjavascript.net/code#5[_eloquentjavascript.net/code#5_])!), contains the -content of my ((JSON)) file as a string. Let's decode it and see how -many people it contains. - -// include_code strip_log - -[source,javascript] ----- -var ancestry = JSON.parse(ANCESTRY_FILE); -console.log(ancestry.length); -// → 39 ----- - -== Filtering an array == - -(((array,methods)))(((array,filtering)))(((filter -method)))(((function,higher-order)))(((predicate function)))To find -the people in the ancestry data set who were young in 1924, the -following function might be helpful. It filters out the elements in an -array that don't pass a test. - -[source,javascript] ----- -function filter(array, test) { - var passed = []; - for (var i = 0; i < array.length; i++) { - if (test(array[i])) - passed.push(array[i]); - } - return passed; -} - -console.log(filter(ancestry, function(person) { - return person.born > 1900 && person.born < 1925; -})); -// → [{name: "Philibert Haverbeke", …}, …] ----- - -(((function,as value)))(((function,application)))This uses the -argument named `test`, a function value, to fill in a “gap” in the -computation. The `test` function is called for each element, and its -return value determines whether an element is included in the returned -array. - -(((ancestry example)))Three people in the file were alive and young in -1924: my grandfather, grandmother, and great-aunt. - -(((filter method)))(((pure function)))(((side effect)))Note how the -`filter` function, rather than deleting elements from the existing -array, builds up a new array with only the elements that pass the -test. This function is _pure_. It does not modify the array it is -given. - -Like `forEach`, `filter` is also a ((standard)) method on arrays. The -example defined the function only in order to show what it does -internally. From now on, we'll use it like this instead: - -[source,javascript] ----- -console.log(ancestry.filter(function(person) { - return person.father == "Carel Haverbeke"; -})); -// → [{name: "Carolus Haverbeke", …}] ----- - -== Transforming with map == - -(((array,methods)))(((map method)))(((ancestry example)))Say we -have an array of objects representing people, produced by filtering -the `ancestry` array somehow. But we want an array of names, which is -easier to read. - -(((function,higher-order)))The `map` method transforms an array by -applying a function to all of its elements and building a new array -from the returned values. The new array will have the same length as -the input array, but its content will have been “mapped” to a new form -by the function. - -// test: join - -[source,javascript] ----- -function map(array, transform) { - var mapped = []; - for (var i = 0; i < array.length; i++) - mapped.push(transform(array[i])); - return mapped; -} - -var overNinety = ancestry.filter(function(person) { - return person.died - person.born > 90; -}); -console.log(map(overNinety, function(person) { - return person.name; -})); -// → ["Clara Aernoudts", "Emile Haverbeke", -// "Maria Haverbeke"] ----- - -Interestingly, the people who lived to at least 90 years of age are the -same three people who we saw before—the people who were young in the -1920s, which happens to be the most recent generation in my data set. -I guess ((medicine)) has come a long way. - -Like `forEach` and `filter`, `map` is also a standard method on -arrays. - -== Summarizing with reduce == - -(((array,methods)))(((summing example)))(((reduce method)))(((ancestry -example)))Another common pattern of computation on arrays is computing -a single value from them. Our recurring example, summing a collection -of numbers, is an instance of this. Another example would be finding -the person with the earliest year of birth in the data set. - -(((function,higher-order)))(((fold function)))The higher-order -operation that represents this pattern is called _reduce_ (or -sometimes _fold_). You can think of it as folding up the array, one -element at a time. When summing numbers, you'd start with the number -zero and, for each element, combine it with the current sum by adding -the two. - -The parameters to the `reduce` function are, apart from the array, a -combining function and a start value. This function is a little less -straightforward than `filter` and `map`, so pay close attention. - -[source,javascript] ----- -function reduce(array, combine, start) { - var current = start; - for (var i = 0; i < array.length; i++) - current = combine(current, array[i]); - return current; -} - -console.log(reduce([1, 2, 3, 4], function(a, b) { - return a + b; -}, 0)); -// → 10 ----- - -(((reduce method)))The standard array method `reduce`, which of course -corresponds to this function, has an added convenience. If your array -contains at least one element, you are allowed to leave off the -`start` argument. The method will take the first element of the array -as its start value and start reducing at the second element. - -(((ancestry example)))(((minimum)))To use `reduce` to find my most -ancient known ancestor, we can write something like this: - -// test: no - -[source,javascript] ----- -console.log(ancestry.reduce(function(min, cur) { - if (cur.born < min.born) return cur; - else return min; -})); -// → {name: "Pauwels van Haverbeke", born: 1535, …} ----- - -== Composability == - -(((loop)))(((minimum)))(((ancestry example)))Consider how we would -have written the previous example (finding the person with the -earliest year of birth) without higher-order functions. The code is -not that much worse. - -// test: no - -[source,javascript] ----- -var min = ancestry[0]; -for (var i = 1; i < ancestry.length; i++) { - var cur = ancestry[i]; - if (cur.born < min.born) - min = cur; -} -console.log(min); -// → {name: "Pauwels van Haverbeke", born: 1535, …} ----- - -There are a few more ((variable))s, and the program is two lines -longer but still quite easy to understand. - -[[average_function]] -(((average -function)))(((composability)))(((function,higher-order)))Higher-order -functions start to shine when you need to _compose_ functions. As an -example, let's write code that finds the average age for men and for -women in the data set. - -// test: clip - -[source,javascript] ----- -function average(array) { - function plus(a, b) { return a + b; } - return array.reduce(plus) / array.length; -} -function age(p) { return p.died - p.born; } -function male(p) { return p.sex == "m"; } -function female(p) { return p.sex == "f"; } - -console.log(average(ancestry.filter(male).map(age))); -// → 61.67 -console.log(average(ancestry.filter(female).map(age))); -// → 54.56 ----- - -(((plus function)))(((+ operator)))(((function,as value)))(It's a bit -silly that we have to define `plus` as a function, but operators in -JavaScript, unlike functions, are not values, so you can't pass them -as arguments.) - -(((abstraction)))(((vocabulary)))Instead of tangling the logic into a -big ((loop)), it is neatly composed into the concepts we are -interested in—determining sex, computing age, and averaging numbers. We -can apply these one by one to get the result we are looking for. - -This is _fabulous_ for writing clear code. Unfortunately, this clarity -comes at a cost. - -== The cost == - -(((efficiency)))(((optimization)))In the happy land of elegant code -and pretty rainbows, there lives a spoil-sport monster called -_inefficiency_. - -(((elegance)))(((array,creation)))(((pure -function)))(((composability)))A program that processes an array is most -elegantly expressed as a sequence of cleanly separated steps that each -do something with the array and produce a new array. But building up -all those intermediate arrays is somewhat expensive. - -(((readability)))(((function,application)))(((forEach -method)))(((function,as value)))Likewise, passing a function to -`forEach` and letting that method handle the array iteration for us is -convenient and easy to read. But function calls in JavaScript are -costly compared to simple loop bodies. - -(((abstraction)))And so it goes with a lot of techniques that help -improve the clarity of a program. Abstractions add layers between the -raw things the computer is doing and the concepts we are working with -and thus cause the machine to perform more work. This is not an iron -law—there are programming languages that have better support for -building abstractions without adding inefficiencies, and even in -JavaScript, an experienced programmer can find ways to write abstract -code that is still fast. But it is a problem that comes up a lot. - -(((profiling)))Fortunately, most computers are insanely fast. If you -are processing a modest set of data or doing something that has -to happen only on a human time scale (say, every time the user clicks a -button), then it _does not matter_ whether you wrote a pretty solution -that takes half a millisecond or a super-optimized solution that takes -a tenth of a millisecond. - -(((nesting,of loops)))(((inner loop)))(((complexity)))It is helpful to -roughly keep track of how often a piece of your program is going to -run. If you have a ((loop)) inside a loop (either directly or through -the outer loop calling a function that ends up performing the inner -loop), the code inside the inner loop will end up running __N__×__M__ -times, where _N_ is the number of times the outer loop repeats and -_M_ is the number of times the inner loop repeats within each iteration -of the outer loop. If that inner loop contains another loop that makes -_P_ rounds, its body will run __M__×__N__×__P__ times, and so on. This -can add up to large numbers, and when a program is slow, the problem -can often be traced to only a small part of the code, which sits inside an inner loop. - -== Great-great-great-great-... == - -(((ancestry example)))My ((grandfather)), Philibert Haverbeke, is -included in the data file. By starting with him, I can trace my -lineage to find out whether the most ancient person in the data, -Pauwels van Haverbeke, is my direct ancestor. And if he is, I would -like to know how much ((DNA)) I theoretically share with him. - -(((byName object)))(((map)))(((data structure)))(((object,as -map)))To be able to go from a parent's name to the actual object that -represents this person, we first build up an object that associates -names with people. - -// include_code strip_log - -[source,javascript] ----- -var byName = {}; -ancestry.forEach(function(person) { - byName[person.name] = person; -}); - -console.log(byName["Philibert Haverbeke"]); -// → {name: "Philibert Haverbeke", …} ----- - -Now, the problem is not entirely as simple as following the `father` -properties and counting how many we need to reach Pauwels. There are -several cases in the family ((tree)) where people married their second -cousins (tiny villages and all that). This causes the branches of the -family tree to rejoin in a few places, which means I share more than -1/2^_G_^ of my genes with this person, where _G_ for the number of -generations between Pauwels and me. This formula comes from the idea -that each generation splits the gene pool in two. - -(((reduce method)))(((data structure)))A reasonable way to think about -this problem is to look at it as being analogous to `reduce`, which -condenses an array to a single value by repeatedly combining -values, left to right. In this case, we also want to condense our data -structure to a single value but in a way that follows family -lines. The _shape_ of the data is that of a family tree, rather than a -flat list. - -The way we want to reduce this shape is by computing a value for a -given person by combining values from their ancestors. This can be -done recursively: if we are interested in person _A_, we have to -compute the values for __A__’s parents, which in turn requires us to -compute the value for __A__’s grandparents, and so on. In principle, -that'd require us to look at an infinite number of people, but since -our data set is finite, we have to stop somewhere. We'll allow a -((default value)) to be given to our reduction function, which will be -used for people who are not in the data. In our case, that value is -simply zero, on the assumption that people not in the list don't share -DNA with the ancestor we are looking at. - -(((recursion)))(((reduceAncestors function)))Given a person, a -function to combine values from the two parents of a given person, and -a default value, `reduceAncestors` condenses a value from a family -tree. - -// include_code - -[source,javascript] ----- -function reduceAncestors(person, f, defaultValue) { - function valueFor(person) { - if (person == null) - return defaultValue; - else - return f(person, valueFor(byName[person.mother]), - valueFor(byName[person.father])); - } - return valueFor(person); -} ----- - -(((function,higher-order)))The inner function (`valueFor`) handles a -single person. Through the ((magic)) of recursion, it can simply call -itself to handle the father and the mother of this person. The -results, along with the person object itself, are passed to `f`, which -returns the actual value for this person. - -We can then use this to compute the amount of ((DNA)) my -((grandfather)) shared with Pauwels van Haverbeke and divide that by -four. - -// start_code bottom_lines: 2 -// test: clip -// include_code top_lines: 6 - -[source,javascript] ----- -function sharedDNA(person, fromMother, fromFather) { - if (person.name == "Pauwels van Haverbeke") - return 1; - else - return (fromMother + fromFather) / 2; -} -var ph = byName["Philibert Haverbeke"]; -console.log(reduceAncestors(ph, sharedDNA, 0) / 4); -// → 0.00049 ----- - -The person with the name Pauwels van Haverbeke obviously shared 100 percent -of his DNA with Pauwels van Haverbeke (there are no people who share -names in the data set), so the function returns 1 for him. All other -people share the average of the amounts that their parents share. - -So, statistically speaking, I share about 0.05 percent of my ((DNA)) with -this 16th-century person. It should be noted that this is only a -statistical approximation, not an exact amount. It is a rather small -number, but given how much genetic material we carry (about 3 billion -base pairs), there's still probably some aspect in the biological -machine that is me that originates with Pauwels. - -(((ancestry example)))(((reduceAncestors -function)))(((abstraction)))We could also have computed this number -without relying on `reduceAncestors`. But separating the general -approach (condensing a family tree) from the specific case (computing -shared DNA) can improve the clarity of the code and allows us to reuse -the abstract part of the program for other cases. For example, the -following code finds the percentage of a person's known ancestors who -lived past 70 (by lineage, so people may be counted multiple times): - -// test: clip - -[source,javascript] ----- -function countAncestors(person, test) { - function combine(current, fromMother, fromFather) { - var thisOneCounts = current != person && test(current); - return fromMother + fromFather + (thisOneCounts ? 1 : 0); - } - return reduceAncestors(person, combine, 0); -} -function longLivingPercentage(person) { - var all = countAncestors(person, function(person) { - return true; - }); - var longLiving = countAncestors(person, function(person) { - return (person.died - person.born) >= 70; - }); - return longLiving / all; -} -console.log(longLivingPercentage(byName["Emile Haverbeke"])); -// → 0.129 ----- - -Such numbers are not to be taken too seriously, given that -our data set contains a rather arbitrary collection of people. But the -code illustrates the fact that `reduceAncestors` gives us a useful -piece of ((vocabulary)) for working with the family tree data -structure. - -== Binding == - -(((bind method)))(((partial -application)))(((function,application)))The `bind` method, which all -functions have, creates a new function that will call the original -function but with some of the arguments already fixed. - -(((filter method)))(((function,as value)))The following code shows an -example of `bind` in use. It defines a function `isInSet` that -tells us whether a person is in a given set of strings. To call -`filter` in order to collect those person objects whose names are in a -specific set, we can either write a function expression that makes a -call to `isInSet` with our set as its first argument or _partially -apply_ the `isInSet` function. - -[source,javascript] ----- -var theSet = ["Carel Haverbeke", "Maria van Brussel", - "Donald Duck"]; -function isInSet(set, person) { - return set.indexOf(person.name) > -1; -} - -console.log(ancestry.filter(function(person) { - return isInSet(theSet, person); -})); -// → [{name: "Maria van Brussel", …}, -// {name: "Carel Haverbeke", …}] -console.log(ancestry.filter(isInSet.bind(null, theSet))); -// → … same result ----- - -The call to `bind` returns a function that will call `isInSet` with -`theSet` as first argument, followed by any remaining arguments given -to the bound function. - -(((null)))The first argument, where the example passes `null`, is used -for ((method call))s, similar to the first argument to `apply`. I'll -describe this in more detail in the -link:06_object.html#call_method[next chapter]. - -== Summary == - -Being able to pass function values to other functions is not just a -gimmick but a deeply useful aspect of JavaScript. It allows us to -write computations with “gaps” in them as functions and have the code -that calls these functions fill in those gaps by providing function -values that describe the missing computations. - -Arrays provide a number of useful higher-order methods—`forEach` -to do something with each element in an array, `filter` to build a new -array with some elements filtered out, `map` to build a new array -where each element has been put through a function, and `reduce` to -combine all an array's elements into a single value. - -Functions have an `apply` method that can be used to call them with an -array specifying their arguments. They also have a `bind` method, -which is used to create a partially applied version of the function. - -== Exercises == - -=== Flattening === - -(((flattening (exercise))))(((reduce method)))(((concat -method)))(((array)))Use the `reduce` method in combination with -the `concat` method to “flatten” an array of arrays into a single -array that has all the elements of the input arrays. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -var arrays = [[1, 2, 3], [4, 5], [6]]; -// Your code here. -// → [1, 2, 3, 4, 5, 6] ----- -endif::interactive_target[] - -=== Mother-child age difference === - -(((ancestry example)))(((age difference (exercise))))(((average -function)))Using the example data set from this chapter, compute the -average age difference between mothers and children (the age of the -mother when the child is born). You can use the `average` function -defined link:05_higher_order.html#average_function[earlier] in this -chapter. - -(((byName object)))Note that not all the mothers mentioned in the data -are themselves present in the array. The `byName` object, which makes -it easy to find a person's object from their name, might be useful -here. - -ifdef::interactive_target[] - -// test: no -// include_code - -[source,javascript] ----- -function average(array) { - function plus(a, b) { return a + b; } - return array.reduce(plus) / array.length; -} - -var byName = {}; -ancestry.forEach(function(person) { - byName[person.name] = person; -}); - -// Your code here. - -// → 31.2 ----- -endif::interactive_target[] - -!!hint!! - -(((age difference (exercise))))(((filter method)))(((map -method)))(((null)))(((average function)))Because not all elements in -the `ancestry` array produce useful data (we can't compute the age -difference unless we know the birth date of the mother), we will have -to apply `filter` in some manner before calling `average`. You could -do it as a first pass, by defining a `hasKnownMother` function and -filtering on that first. Alternatively, you could start by calling -`map` and in your mapping function return either the age difference -or `null` if no mother is known. Then, you can call `filter` to remove -the `null` elements before passing the array to `average`. - -!!hint!! - -=== Historical life expectancy === - -(((life expectancy (exercise))))When we looked up all the people in -our data set that lived more than 90 years, only the latest -generation in the data came out. Let's take a closer look at that -phenomenon. - -(((average function)))Compute and output the average age of the people -in the ancestry data set per century. A person is assigned to a -((century)) by taking their year of death, dividing it by 100, -and rounding it up, as in `Math.ceil(person.died / 100)`. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -function average(array) { - function plus(a, b) { return a + b; } - return array.reduce(plus) / array.length; -} - -// Your code here. - -// → 16: 43.5 -// 17: 51.2 -// 18: 52.8 -// 19: 54.8 -// 20: 84.7 -// 21: 94 ----- -endif::interactive_target[] - -!!hint!! - -(((life expectancy (exercise))))The essence of this example lies in -((grouping)) the elements of a collection by some aspect of -theirs—splitting the array of ancestors into smaller arrays with the -ancestors for each century. - -(((array)))(((map)))(((object,as map)))During the grouping -process, keep an object that associates ((century)) names (numbers) -with arrays of either person objects or ages. Since we do not know in -advance what categories we will find, we'll have to create them on the -fly. For each person, after computing their century, we test whether -that century was already known. If not, add an array for it. Then add -the person (or age) to the array for the proper century. - -(((for/in loop)))(((average function)))Finally, a `for`/`in` loop can -be used to print the average ages for the individual centuries. - -!!hint!! - -(((grouping)))(((map)))(((object,as map)))(((groupBy function)))For -bonus points, write a function `groupBy` that abstracts the grouping -operation. It should accept as arguments an array and a function that -computes the group for an element in the array and returns an object -that maps group names to arrays of group members. - -=== Every and then some === - -(((predicate function)))(((every and some (exercise))))(((every -method)))(((some method)))(((array,methods)))(((&& operator)))(((|| -operator)))Arrays also come with the standard methods `every` and -`some`. Both take a predicate function that, when called with an array -element as argument, returns true or false. Just like `&&` -returns a true value only when the expressions on both sides are true, -`every` returns true only when the predicate returns true for _all_ -elements of the array. Similarly, `some` returns true as soon as the -predicate returns true for _any_ of the elements. They do not process -more elements than necessary—for example, if `some` finds that the -predicate holds for the first element of the array, it will not look -at the values after that. - -Write two functions, `every` and `some`, that behave like these -methods, except that they take the array as their first argument -rather than being a method. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -console.log(every([NaN, NaN, NaN], isNaN)); -// → true -console.log(every([NaN, NaN, 4], isNaN)); -// → false -console.log(some([NaN, 3, 4], isNaN)); -// → true -console.log(some([2, 3, 4], isNaN)); -// → false ----- -endif::interactive_target[] - -!!hint!! - -(((every and some (exercise))))(((short-circuit evaluation)))(((return -keyword)))The functions can follow a similar pattern to the -link:05_higher_order.html#forEach[definition] of `forEach` at the -start of the chapter, except that they must return immediately (with -the right value) when the predicate function returns false—or true. -Don't forget to put another `return` statement after the loop so that -the function also returns the correct value when it reaches the end of -the array. - -!!hint!! diff --git a/06_object.txt b/06_object.txt deleted file mode 100644 index 82dd87986..000000000 --- a/06_object.txt +++ /dev/null @@ -1,1222 +0,0 @@ -:chap_num: 6 -:prev_link: 05_higher_order -:next_link: 07_elife -:load_files: ["code/mountains.js", "code/chapter/06_object.js"] -:zip: node/html - -= The Secret Life of Objects = - -[chapterquote="true"] -[quote, Joe Armstrong, interviewed in Coders at Work] -____ -The problem with object-oriented languages -is they’ve got all this implicit environment that they carry around -with them. You wanted a banana but what you got was a gorilla holding -the banana and the entire jungle. -____ - -(((Armstrong+++,+++ Joe)))(((object)))(((holy war)))When a programmer -says “object”, this is a loaded term. In my profession, objects are a -way of life, the subject of holy wars, and a beloved buzzword that -still hasn't quite lost its power. - -To an outsider, this is probably a little confusing. Let's start with -a brief ((history)) of objects as a programming construct. - -== History == - -(((isolation)))(((history)))(((object-oriented programming)))(((object)))This story, like most programming stories, starts with the -problem of ((complexity)). One philosophy is that complexity can be -made manageable by separating it into small compartments that are -isolated from each other. These compartments have ended up with the -name _objects_. - -[[interface]] -(((complexity)))(((encapsulation)))(((method)))(((interface)))An -object is a hard shell that hides the gooey complexity inside it -and instead offers us a few knobs and connectors (such as ((method))s) -that present an _interface_ through which the object is to be used. -The idea is that the interface is relatively simple and all the -complex things going on _inside_ the object can be ignored when -working with it. - -image::img/object.jpg[alt="A simple interface can hide a lot of complexity.",width="6cm"] - -As an example, you can imagine an object that provides an interface to -an area on your screen. It provides a way to draw shapes or text onto -this area but hides all the details of how these shapes are converted -to the actual pixels that make up the screen. You'd have a set of -methods—for example, ++drawCircle++—and those are the only things you -need to know in order to use such an object. - -(((object-oriented programming)))These ideas were initially worked out -in the 1970s and 1980s and, in the 1990s, were carried up by a huge wave -of ((hype))—the object-oriented programming revolution. Suddenly, -there was a large tribe of people declaring that objects were the -_right_ way to program—and that anything that did not involve objects -was outdated nonsense. - -That kind of zealotry always produces a lot of impractical silliness, -and there has been a sort of counter-revolution since then. In some -circles, objects have a rather bad reputation nowadays. - -I prefer to look at the issue from a practical, rather than -ideological, angle. There are several useful concepts, most -importantly that of _((encapsulation))_ (distinguishing between -internal complexity and external interface), that the object-oriented -culture has popularized. These are worth studying. - -This chapter describes JavaScript's rather eccentric take on objects -and the way they relate to some classical object-oriented techniques. - -[[obj_methods]] -== Methods == - -(((rabbit example)))(((method)))(((property)))Methods are simply -properties that hold function values. This is a simple method: - -[source,javascript] ----- -var rabbit = {}; -rabbit.speak = function(line) { - console.log("The rabbit says '" + line + "'"); -}; - -rabbit.speak("I'm alive."); -// → The rabbit says 'I'm alive.' ----- - -(((this)))(((method call)))Usually a method needs to do something with -the object it was called on. When a function is called as a -method—looked up as a property and immediately called, as in -++object.method()++—the special variable `this` in its body will point -to the object that it was called on. - -// test: join -// include_code top_lines:6 - -[source,javascript] ----- -function speak(line) { - console.log("The " + this.type + " rabbit says '" + - line + "'"); -} -var whiteRabbit = {type: "white", speak: speak}; -var fatRabbit = {type: "fat", speak: speak}; - -whiteRabbit.speak("Oh my ears and whiskers, " + - "how late it's getting!"); -// → The white rabbit says 'Oh my ears and whiskers, how -// late it's getting!' -fatRabbit.speak("I could sure use a carrot right now."); -// → The fat rabbit says 'I could sure use a carrot -// right now.' ----- - -(((apply method)))(((bind method)))(((this)))(((rabbit example)))The -code uses the `this` keyword to output the type of rabbit that is -speaking. Recall that the `apply` and `bind` methods both take a first -argument that can be used to simulate method calls. This first -argument is in fact used to give a value to `this`. - -[[call_method]] -(((call method)))There is a method similar to `apply`, called `call`. -It also calls the function it is a method of but takes its arguments -normally, rather than as an array. Like `apply` and `bind`, `call` can -be passed a specific `this` value. - -[source,javascript] ----- -speak.apply(fatRabbit, ["Burp!"]); -// → The fat rabbit says 'Burp!' -speak.call({type: "old"}, "Oh my."); -// → The old rabbit says 'Oh my.' ----- - -[[prototypes]] -== Prototypes == - -(((toString method)))Watch closely. - -[source,javascript] ----- -var empty = {}; -console.log(empty.toString); -// → function toString(){…} -console.log(empty.toString()); -// → [object Object] ----- - -(((magic)))I just pulled a property out of an empty object. Magic! - -(((property)))(((object)))Well, not really. I have simply been -withholding information about the way JavaScript objects work. In -addition to their set of properties, almost all objects also have a -_prototype_. A ((prototype)) is another object that is used as a -fallback source of properties. When an object gets a request for a -property that it does not have, its prototype will be searched for the -property, then the prototype's prototype, and so on. - -(((Object prototype)))So who is the ((prototype)) of that empty -object? It is the great ancestral prototype, the entity behind almost -all objects, `Object.prototype`. - -[source,javascript] ----- -console.log(Object.getPrototypeOf({}) == - Object.prototype); -// → true -console.log(Object.getPrototypeOf(Object.prototype)); -// → null ----- - -(((getPrototypeOf function)))As you might expect, the -`Object.getPrototypeOf` function returns the prototype of an object. - -(((toString method)))The prototype relations of JavaScript objects -form a ((tree))-shaped structure, and at the root of this structure -sits `Object.prototype`. It provides a few ((method))s that show up in -all objects, such as `toString`, which converts an object to a string -representation. - -(((inheritance)))(((Function prototype)))(((Array -prototype)))(((Object prototype)))Many objects don't directly have -`Object.prototype` as their ((prototype)), but instead have another -object, which provides its own default properties. Functions derive -from `Function.prototype`, and arrays derive from `Array.prototype`. - -[source,javascript] ----- -console.log(Object.getPrototypeOf(isNaN) == - Function.prototype); -// → true -console.log(Object.getPrototypeOf([]) == - Array.prototype); -// → true ----- - -(((Object prototype)))Such a prototype object will itself have a -prototype, often `Object.prototype`, so that it still indirectly -provides methods like `toString`. - -(((getPrototypeOf function)))(((rabbit example)))(((Object.create -function)))The `Object.getPrototypeOf` function obviously returns the -prototype of an object. You can use `Object.create` to create an -object with a specific ((prototype)). - -[source,javascript] ----- -var protoRabbit = { - speak: function(line) { - console.log("The " + this.type + " rabbit says '" + - line + "'"); - } -}; -var killerRabbit = Object.create(protoRabbit); -killerRabbit.type = "killer"; -killerRabbit.speak("SKREEEE!"); -// → The killer rabbit says 'SKREEEE!' ----- - -(((shared property)))The “proto” rabbit acts as a container for the -properties that are shared by all rabbits. An individual rabbit -object, like the killer rabbit, contains properties that apply only to -itself—in this case its type—and derives shared properties from its -prototype. - -[[constructors]] -== Constructors == - -(((new operator)))(((this)))(((return keyword)))(((object,creation)))A more convenient way to create objects that derive -from some shared prototype is to use a _((constructor))_. In -JavaScript, calling a function with the `new` keyword in front of it -causes it to be treated as a constructor. The constructor will have -its `this` variable bound to a fresh object, and unless it explicitly -returns another object value, this new object will be returned from -the call. - -An object created with `new` is said to be an _((instance))_ of its -constructor. - -(((rabbit example)))(((capitalization)))Here is a simple constructor -for rabbits. It is a convention to capitalize the names of -constructors so that they are easily distinguished from other -functions. - -// include_code top_lines:6 - -[source,javascript] ----- -function Rabbit(type) { - this.type = type; -} - -var killerRabbit = new Rabbit("killer"); -var blackRabbit = new Rabbit("black"); -console.log(blackRabbit.type); -// → black ----- - -(((prototype property)))(((constructor)))Constructors (in fact, all -functions) automatically get a property named `prototype`, which by -default holds a plain, empty object that derives from -`Object.prototype`. Every instance created with this constructor will -have this object as its ((prototype)). So to add a `speak` method to -rabbits created with the `Rabbit` constructor, we can simply do this: - -// include_code top_lines:4 - -[source,javascript] ----- -Rabbit.prototype.speak = function(line) { - console.log("The " + this.type + " rabbit says '" + - line + "'"); -}; -blackRabbit.speak("Doom..."); -// → The black rabbit says 'Doom...' ----- - -(((prototype property)))(((getPrototypeOf function)))It is important -to note the distinction between the way a prototype is associated with -a constructor (through its `prototype` property) and the way objects -_have_ a prototype (which can be retrieved with -`Object.getPrototypeOf`). The actual prototype of a constructor is -`Function.prototype` since constructors are functions. Its -`prototype` _property_ will be the prototype of instances created -through it but is not its _own_ prototype. - -== Overriding derived properties == - -(((shared property)))(((overriding)))When you add a ((property)) to an -object, whether it is present in the prototype or not, the property is -added to the object _itself_, which will henceforth have it as its own -property. If there _is_ a property by the same name in the prototype, -this property will no longer affect the object. The prototype itself -is not changed. - -[source,javascript] ----- -Rabbit.prototype.teeth = "small"; -console.log(killerRabbit.teeth); -// → small -killerRabbit.teeth = "long, sharp, and bloody"; -console.log(killerRabbit.teeth); -// → long, sharp, and bloody -console.log(blackRabbit.teeth); -// → small -console.log(Rabbit.prototype.teeth); -// → small ----- - -(((prototype,diagram)))The following diagram sketches the situation -after this code has run. The `Rabbit` and `Object` ((prototype))s lie -behind `killerRabbit` as a kind of backdrop, where properties that are -not found in the object itself can be looked up. - -image::img/rabbits.svg[alt="Rabbit object prototype schema",width="8cm"] - -(((shared property)))Overriding properties that exist in a prototype -is often a useful thing to do. As the rabbit teeth example shows, it -can be used to express exceptional properties in instances of a more -generic class of objects, while letting the nonexceptional objects -simply take a standard value from their prototype. - -(((toString method)))(((Array prototype)))(((Function prototype)))It -is also used to give the standard function and array prototypes a -different `toString` method than the basic object prototype. - -[source,javascript] ----- -console.log(Array.prototype.toString == - Object.prototype.toString); -// → false -console.log([1, 2].toString()); -// → 1,2 ----- - -(((toString method)))(((join method)))(((call method)))Calling -`toString` on an array gives a result similar to calling `.join(",")` -on it—it puts commas between the values in the array. Directly calling -`Object.prototype.toString` with an array produces a different string. -That function doesn't know about arrays, so it simply puts the word -“object” and the name of the type between square brackets. - -[source,javascript] ----- -console.log(Object.prototype.toString.call([1, 2])); -// → [object Array] ----- - -== Prototype interference == - -(((prototype,interference)))(((rabbit example)))(((mutability)))A -((prototype)) can be used at any time to add new properties and -methods to all objects based on it. For example, it might become -necessary for our rabbits to dance. - -[source,javascript] ----- -Rabbit.prototype.dance = function() { - console.log("The " + this.type + " rabbit dances a jig."); -}; -killerRabbit.dance(); -// → The killer rabbit dances a jig. ----- - -(((map)))(((object,as map)))That's convenient. But there are -situations where it causes problems. In previous chapters, we used an -object as a way to associate values with names by creating properties -for the names and giving them the corresponding value as their value. -Here's an example from link:04_data.html#object_map[Chapter 4]: - -// include_code - -[source,javascript] ----- -var map = {}; -function storePhi(event, phi) { - map[event] = phi; -} - -storePhi("pizza", 0.069); -storePhi("touched tree", -0.081); ----- - -(((for/in loop)))(((in operator)))We can iterate over all phi values -in the object using a `for`/`in` loop and test whether a name is in -there using the regular `in` operator. But unfortunately, the object's -prototype gets in the way. - -[source,javascript] ----- -Object.prototype.nonsense = "hi"; -for (var name in map) - console.log(name); -// → pizza -// → touched tree -// → nonsense -console.log("nonsense" in map); -// → true -console.log("toString" in map); -// → true - -// Delete the problematic property again -delete Object.prototype.nonsense; ----- - -(((prototype,pollution)))(((toString method)))That's all wrong. There -is no event called “nonsense” in our data set. And there _definitely_ -is no event called “toString”. - -(((enumerability)))(((for/in loop)))(((property)))Oddly, `toString` -did not show up in the `for`/`in` loop, but the `in` operator did -return true for it. This is because JavaScript distinguishes between -_enumerable_ and _nonenumerable_ properties. - -(((Object prototype)))All properties that we create by simply -assigning to them are enumerable. The standard properties in -`Object.prototype` are all nonenumerable, which is why they do not -show up in such a `for`/`in` loop. - -(((defineProperty function)))It is possible to define our own -nonenumerable properties by using the `Object.defineProperty` -function, which allows us to control the type of property we are -creating. - -[source,javascript] ----- -Object.defineProperty(Object.prototype, "hiddenNonsense", - {enumerable: false, value: "hi"}); -for (var name in map) - console.log(name); -// → pizza -// → touched tree -console.log(map.hiddenNonsense); -// → hi ----- - -(((in operator)))(((map)))(((object,as map)))(((hasOwnProperty -method)))So now the property is there, but it won't show up in a loop. -That's good. But we still have the problem with the regular `in` -operator claiming that the `Object.prototype` properties exist in our -object. For that, we can use the object's `hasOwnProperty` method. - -[source,javascript] ----- -console.log(map.hasOwnProperty("toString")); -// → false ----- - -(((property,own)))This method tells us whether the object _itself_ has -the property, without looking at its prototypes. This is often a more -useful piece of information than what the `in` operator gives us. - -(((prototype,pollution)))(((for/in loop)))When you are worried that -someone (some other code you loaded into your program) might have -messed with the base object prototype, I recommend you write your -`for`/`in` loops like this: - -[source,javascript] ----- -for (var name in map) { - if (map.hasOwnProperty(name)) { - // ... this is an own property - } -} ----- - -== Prototype-less objects == - -(((map)))(((object,as map)))(((hasOwnProperty method)))But the -rabbit hole doesn't end there. What if someone registered the name -`hasOwnProperty` in our `map` object and set it to the value 42? Now -the call to `map.hasOwnProperty` will try to call the local property, -which holds a number, not a function. - -(((Object.create function)))(((prototype,avoidance)))In such a case, -prototypes just get in the way, and we would actually prefer to have -objects without prototypes. We saw the `Object.create` function, which -allows us to create an object with a specific prototype. You are -allowed to pass `null` as the prototype to create a fresh object with -no prototype. For objects like `map`, where the properties could be -anything, this is exactly what we want. - -[source,javascript] ----- -var map = Object.create(null); -map["pizza"] = 0.069; -console.log("toString" in map); -// → false -console.log("pizza" in map); -// → true ----- - -(((in operator)))(((for/in loop)))(((Object prototype)))Much -better! We no longer need the `hasOwnProperty` kludge because all the -properties the object has are its own properties. Now we can safely -use `for`/`in` loops, no matter what people have been doing to -`Object.prototype`. - -== Polymorphism == - -(((toString method)))(((String -function)))(((polymorphism)))(((overriding)))When you call the -`String` function, which converts a value to a string, on an object, -it will call the `toString` method on that object to try to create a -meaningful string to return. I mentioned that some of the standard -prototypes define their own version of `toString` so they can -create a string that contains more useful information than -`"[object Object]"`. - -(((object-oriented programming)))This is a simple instance of a -powerful idea. When a piece of code is written to work with objects -that have a certain ((interface))—in this case, a `toString` -method—any kind of object that happens to support this interface can -be plugged into the code, and it will just work. - -This technique is called __polymorphism__—though no actual -shape-shifting is involved. Polymorphic code can work with values of -different shapes, as long as they support the interface it expects. - -[[tables]] -== Laying out a table == - -(((MOUNTAINS data set)))(((table example)))I am going to work through -a slightly more involved example in an attempt to give you a better -idea what ((polymorphism)), as well as ((object-oriented programming)) -in general, looks like. The project is this: we will write a program -that, given an array of arrays of ((table)) cells, builds up a string -that contains a nicely laid out table—meaning that the columns are -straight and the rows are aligned. Something like this: - -[source,text/plain] ----- -name height country ------------- ------ ------------- -Kilimanjaro 5895 Tanzania -Everest 8848 Nepal -Mount Fuji 3776 Japan -Mont Blanc 4808 Italy/France -Vaalserberg 323 Netherlands -Denali 6168 United States -Popocatepetl 5465 Mexico ----- - -The way our table-building system will work is that the builder -function will ask each cell how wide and high it wants to be and then -use this information to determine the width of the columns and the -height of the rows. The builder function will then ask the cells to -draw themselves at the correct size and assemble the results into a -single string. - -[[table_interface]] -(((table example)))The layout program will communicate with the cell -objects through a well-defined ((interface)). That way, the types of -cells that the program supports is not fixed in advance. We can add -new cell styles later—for example, underlined cells for table -headers—and if they support our interface, they will just work, -without requiring changes to the layout program. - -This is the interface: - -* `minHeight()` returns a number indicating the minimum height this - cell requires (in lines). - -* `minWidth()` returns a number indicating this cell's minimum width (in - characters). - -* `draw(width, height)` returns an array of length - `height`, which contains a series of strings that are each `width` characters wide. - This represents the content of the cell. - -(((function,higher-order)))I'm going to make heavy use of higher-order -array methods in this example since it lends itself well to that -approach. - -(((rowHeights function)))(((colWidths function)))(((maximum)))(((map -method)))(((reduce method)))The first part of the program computes -arrays of minimum column widths and row heights for a grid of cells. -The `rows` variable will hold an array of arrays, with each inner array -representing a row of cells. - -// include_code - -[source,javascript] ----- -function rowHeights(rows) { - return rows.map(function(row) { - return row.reduce(function(max, cell) { - return Math.max(max, cell.minHeight()); - }, 0); - }); -} - -function colWidths(rows) { - return rows[0].map(function(_, i) { - return rows.reduce(function(max, row) { - return Math.max(max, row[i].minWidth()); - }, 0); - }); -} ----- - -(((underscore character)))(((programming style)))Using a variable name -starting with an underscore (_) or consisting entirely of a single -underscore is a way to indicate (to human readers) that this argument -is not going to be used. - -The `rowHeights` function shouldn't be too hard to follow. It uses -`reduce` to compute the maximum height of an array of cells and wraps -that in `map` in order to do it for all rows in the `rows` array. - -(((map method)))(((filter method)))(((forEach -method)))(((array,indexing)))(((reduce method)))Things are slightly -harder for the `colWidths` function because the outer array is an -array of rows, not of columns. I have failed to mention so far that -`map` (as well as `forEach`, `filter`, and similar array methods) -passes a second argument to the function it is given: the ((index)) of -the current element. By mapping over the elements of the first row and -only using the mapping function's second argument, `colWidths` builds -up an array with one element for every column index. The call to -`reduce` runs over the outer `rows` array for each index and picks -out the width of the widest cell at that index. - -(((table example)))(((drawTable function)))Here's the code to draw a -table: - -// include_code - -[source,javascript] ----- -function drawTable(rows) { - var heights = rowHeights(rows); - var widths = colWidths(rows); - - function drawLine(blocks, lineNo) { - return blocks.map(function(block) { - return block[lineNo]; - }).join(" "); - } - - function drawRow(row, rowNum) { - var blocks = row.map(function(cell, colNum) { - return cell.draw(widths[colNum], heights[rowNum]); - }); - return blocks[0].map(function(_, lineNo) { - return drawLine(blocks, lineNo); - }).join("\n"); - } - - return rows.map(drawRow).join("\n"); -} ----- - -(((inner function)))(((nesting,of functions)))The `drawTable` function -uses the internal helper function `drawRow` to draw all rows and then -joins them together with newline characters. - -(((table example)))The `drawRow` function itself first converts the -cell objects in the row to _blocks_, which are arrays of strings -representing the content of the cells, split by line. A single cell -containing simply the number 3776 might be represented by a -single-element array like `["3776"]`, whereas an underlined cell might -take up two lines and be represented by the array `["name", "----"]`. - -(((map method)))(((join method)))The blocks for a row, which all have -the same height, should appear next to each other in the final output. -The second call to `map` in `drawRow` builds up this output line by -line by mapping over the lines in the leftmost block and, for each of -those, collecting a line that spans the full width of the table. These -lines are then joined with newline characters to provide the whole row -as `drawRow`’s return value. - -The function `drawLine` extracts lines that should appear next -to each other from an array of blocks and joins them with a space -character to create a one-character gap between the table's columns. - -[[split]] -(((split method)))(((string,methods)))(((table example)))Now -let's write a constructor for cells that contain text, which -implements the ((interface)) for table cells. The constructor splits a -string into an array of lines using the string method `split`, which -cuts up a string at every occurrence of its argument and returns an -array of the pieces. The `minWidth` method finds the maximum line -width in this array. - -// include_code - -[source,javascript] ----- -function repeat(string, times) { - var result = ""; - for (var i = 0; i < times; i++) - result += string; - return result; -} - -function TextCell(text) { - this.text = text.split("\n"); -} -TextCell.prototype.minWidth = function() { - return this.text.reduce(function(width, line) { - return Math.max(width, line.length); - }, 0); -}; -TextCell.prototype.minHeight = function() { - return this.text.length; -}; -TextCell.prototype.draw = function(width, height) { - var result = []; - for (var i = 0; i < height; i++) { - var line = this.text[i] || ""; - result.push(line + repeat(" ", width - line.length)); - } - return result; -}; ----- - -(((TextCell type)))The code uses a helper function called `repeat`, -which builds a string whose value is the `string` argument repeated -`times` number of times. The `draw` method uses it to add “padding” to -lines so that they all have the required length. - -Let's try everything we've written so far by building up a 5 × 5 -checkerboard. - -[source,javascript] ----- -var rows = []; -for (var i = 0; i < 5; i++) { - var row = []; - for (var j = 0; j < 5; j++) { - if ((j + i) % 2 == 0) - row.push(new TextCell("##")); - else - row.push(new TextCell(" ")); - } - rows.push(row); -} -console.log(drawTable(rows)); -// → ## ## ## -// ## ## -// ## ## ## -// ## ## -// ## ## ## ----- - -It works! But since all cells have the same size, the table-layout -code doesn't really do anything interesting. - -[[mountains]] -(((data set)))(((MOUNTAINS data set)))The source data for the table of -mountains that we are trying to build is available in the `MOUNTAINS` -variable in the ((sandbox)) and also -http://eloquentjavascript.net/code/mountains.js[downloadable] from the -website(!book (http://eloquentjavascript.net/code#6[_eloquentjavascript.net/code#6_])!). - -(((table example)))We will want to highlight the top row, which -contains the column names, by underlining the cells with a series of -dash characters. No problem—we simply write a cell type that handles -underlining. - -// include_code - -[source,javascript] ----- -function UnderlinedCell(inner) { - this.inner = inner; -} -UnderlinedCell.prototype.minWidth = function() { - return this.inner.minWidth(); -}; -UnderlinedCell.prototype.minHeight = function() { - return this.inner.minHeight() + 1; -}; -UnderlinedCell.prototype.draw = function(width, height) { - return this.inner.draw(width, height - 1) - .concat([repeat("-", width)]); -}; ----- - -(((UnterlinedCell type)))An underlined cell _contains_ another cell. -It reports its minimum size as being the same as that of its inner -cell (by calling through to that cell's `minWidth` and `minHeight` -methods) but adds one to the height to account for the space taken -up by the underline. - -(((concat method)))(((concatenation)))Drawing such a cell is quite -simple—we take the content of the inner cell and concatenate a single -line full of dashes to it. - -(((dataTable function)))Having an underlining mechanism, we can now -write a function that builds up a grid of cells from our data set. - -// test: wrap, trailing - -[source,javascript] ----- -function dataTable(data) { - var keys = Object.keys(data[0]); - var headers = keys.map(function(name) { - return new UnderlinedCell(new TextCell(name)); - }); - var body = data.map(function(row) { - return keys.map(function(name) { - return new TextCell(String(row[name])); - }); - }); - return [headers].concat(body); -} - -console.log(drawTable(dataTable(MOUNTAINS))); -// → name height country -// ------------ ------ ------------- -// Kilimanjaro 5895 Tanzania -// … etcetera ----- - -[[keys]] -(((Object.keys function)))(((property)))(((for/in loop)))The standard -`Object.keys` function returns an array of property names in an -object. The top row of the table must contain underlined cells that -give the names of the columns. Below that, the values of all the -objects in the data set appear as normal cells—we extract them by -mapping over the `keys` array so that we are sure that the order of -the cells is the same in every row. - -(((right-aligning)))The resulting table resembles the example shown -before, except that it does not right-align the numbers in the -`height` column. We will get to that in a moment. - -== Getters and setters == - -(((getter)))(((setter)))(((property)))When specifying an interface, it -is possible to include properties that are not methods. We could have -defined `minHeight` and `minWidth` to simply hold numbers. But that'd -have required us to compute them in the ((constructor)), which adds -code there that isn't strictly relevant to _constructing_ the object. -It would cause problems if, for example, the inner cell of an -underlined cell was changed, at which point the size of the underlined -cell should also change. - -(((programming style)))This has led some people to adopt a principle -of never including nonmethod properties in interfaces. Rather than -directly access a simple value property, they'd use `getSomething` and -`setSomething` methods to read and write the property. This approach -has the downside that you will end up writing—and reading—a lot of -additional methods. - -Fortunately, JavaScript provides a technique that gets us the best of -both worlds. We can specify properties that, from the outside, look -like normal properties but secretly have ((method))s associated with -them. - -[source,javascript] ----- -var pile = { - elements: ["eggshell", "orange peel", "worm"], - get height() { - return this.elements.length; - }, - set height(value) { - console.log("Ignoring attempt to set height to", value); - } -}; - -console.log(pile.height); -// → 3 -pile.height = 100; -// → Ignoring attempt to set height to 100 ----- - -(((defineProperty function)))((({} -(object))))(((getter)))(((setter)))In an object literal, the `get` or -`set` notation for properties allows you to specify a function to be -run when the property is read or written. You can also add such a -property to an existing object, for example a prototype, using the -`Object.defineProperty` function (which we previously used to create -nonenumerable properties). - -[source,javascript] ----- -Object.defineProperty(TextCell.prototype, "heightProp", { - get: function() { return this.text.length; } -}); - -var cell = new TextCell("no\nway"); -console.log(cell.heightProp); -// → 2 -cell.heightProp = 100; -console.log(cell.heightProp); -// → 2 ----- - -You can use a similar `set` property, in the object passed to -`defineProperty`, to specify a setter method. When a getter but no -setter is defined, writing to the property is simply ignored. - -== Inheritance == - -(((inheritance)))(((table example)))(((alignment)))(((TextCell -type)))We are not quite done yet with our table layout exercise. It -helps readability to right-align columns of numbers. We should create -another cell type that is like `TextCell`, but rather than padding the -lines on the right side, it pads them on the left side so that they -align to the right. - -(((RTextCell type)))We could simply write a whole new ((constructor)) -with all three methods in its prototype. But prototypes may themselves -have prototypes, and this allows us to do something clever. - -// include_code - -[source,javascript] ----- -function RTextCell(text) { - TextCell.call(this, text); -} -RTextCell.prototype = Object.create(TextCell.prototype); -RTextCell.prototype.draw = function(width, height) { - var result = []; - for (var i = 0; i < height; i++) { - var line = this.text[i] || ""; - result.push(repeat(" ", width - line.length) + line); - } - return result; -}; ----- - -(((shared property)))(((overriding)))(((interface)))We reuse the -constructor and the `minHeight` and `minWidth` methods from the -regular `TextCell`. An `RTextCell` is now basically equivalent to a -`TextCell`, except that its `draw` method contains a different -function. - -(((call method)))This pattern is called _((inheritance))_. It allows -us to build slightly different data types from existing data types with -relatively little work. Typically, the new constructor will call the -old ((constructor)) (using the `call` method in order to be able to -give it the new object as its `this` value). Once this constructor has -been called, we can assume that all the fields that the old object -type is supposed to contain have been added. We arrange for the -constructor's ((prototype)) to derive from the old prototype so that -instances of this type will also have access to the properties in that -prototype. Finally, we can override some of these properties by adding -them to our new prototype. - -(((dataTable function)))Now, if we slightly adjust the `dataTable` -function to use ++RTextCell++s for cells whose value is a number, we -get the table we were aiming for. - -// start_code bottom_lines: 1 -// include_code strip_log - -[source,javascript] ----- -function dataTable(data) { - var keys = Object.keys(data[0]); - var headers = keys.map(function(name) { - return new UnderlinedCell(new TextCell(name)); - }); - var body = data.map(function(row) { - return keys.map(function(name) { - var value = row[name]; - // This was changed: - if (typeof value == "number") - return new RTextCell(String(value)); - else - return new TextCell(String(value)); - }); - }); - return [headers].concat(body); -} - -console.log(drawTable(dataTable(MOUNTAINS))); -// → … beautifully aligned table ----- - -(((object-oriented programming)))Inheritance is a fundamental part of -the object-oriented tradition, alongside encapsulation and -polymorphism. But while the latter two are now generally regarded as -wonderful ideas, inheritance is somewhat controversial. - -(((complexity)))The main reason for this is that it is often confused -with ((polymorphism)), sold as a more powerful tool than it really -is, and subsequently overused in all kinds of ugly ways. Whereas -((encapsulation)) and polymorphism can be used to _separate_ pieces of -code from each other, reducing the tangledness of the overall program, -((inheritance)) fundamentally ties types together, creating _more_ -tangle. - -(((code structure)))(((programming style)))You can have -polymorphism without inheritance, as we saw. I am not going to tell -you to avoid inheritance entirely—I use it regularly in my own -programs. But you should see it as a slightly dodgy trick that can help you -define new types with little code, not as a grand principle of code -organization. A preferable way to extend types is through -((composition)), such as how `UnderlinedCell` builds on another cell -object by simply storing it in a property and forwarding method calls -to it in its own ((method))s. - -== The instanceof operator == - -(((type)))(((instanceof operator)))(((constructor)))(((object)))It is occasionally useful to know whether an object was derived -from a specific constructor. For this, JavaScript provides a binary -operator called `instanceof`. - -[source,javascript] ----- -console.log(new RTextCell("A") instanceof RTextCell); -// → true -console.log(new RTextCell("A") instanceof TextCell); -// → true -console.log(new TextCell("A") instanceof RTextCell); -// → false -console.log([1] instanceof Array); -// → true ----- - -(((inheritance)))The operator will see through inherited types. -An `RTextCell` is an instance of `TextCell` because -`RTextCell.prototype` derives from `TextCell.prototype`. The operator -can be applied to standard constructors like `Array`. Almost every -object is an instance of `Object`. - -== Summary == - -So objects are more complicated than I initially portrayed them. They -have prototypes, which are other objects, and will act as if they have -properties they don't have as long as the prototype has that property. -Simple objects have `Object.prototype` as their prototype. - -Constructors, which are functions whose names usually start with a -capital letter, can be used with the `new` operator to create new -objects. The new object's prototype will be the object found in the -`prototype` property of the constructor function. You can make good -use of this by putting the properties that all values of a given type -share into their prototype. The `instanceof` operator can, given an -object and a constructor, tell you whether that object is an instance -of that constructor. - -One useful thing to do with objects is to specify an interface for -them and tell everybody that they are supposed to talk to your -object only through that interface. The rest of the details that make up -your object are now _encapsulated_, hidden behind the interface. - -Once you are talking in terms of interfaces, who says that only one -kind of object may implement this interface? Having different objects -expose the same interface and then writing code that works on any -object with the interface is called _polymorphism_. It is very -useful. - -When implementing multiple types that differ in only some details, it -can be helpful to simply make the prototype of your new type derive -from the prototype of your old type and have your new constructor -call the old one. This gives you an object type similar to the -old type but for which you can add and override properties as you see -fit. - -== Exercises == - -[[exercise_vector]] -=== A vector type === - -(((dimensions)))(((Vector type)))(((coordinates)))(((vector (exercise))))Write a -((constructor)) `Vector` that represents a vector in two-dimensional -space. It takes `x` and `y` parameters (numbers), which it should save -to properties of the same name. - -(((addition)))(((subtraction)))Give the `Vector` prototype two -methods, `plus` and `minus`, that take another vector as a parameter -and return a new vector that has the sum or difference of the two -vectors’ (the one in `this` and the parameter) _x_ and _y_ values. - -Add a ((getter)) property `length` to the prototype that computes the -length of the vector—that is, the distance of the point (_x_, _y_) from -the origin (0, 0). - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -console.log(new Vector(1, 2).plus(new Vector(2, 3))); -// → Vector{x: 3, y: 5} -console.log(new Vector(1, 2).minus(new Vector(2, 3))); -// → Vector{x: -1, y: -1} -console.log(new Vector(3, 4).length); -// → 5 ----- -endif::interactive_target[] - -!!hint!! - -(((vector (exercise))))Your solution can follow the pattern of the -`Rabbit` constructor from this chapter quite closely. - -(((Pythagoras)))(((defineProperty function)))(((square -root)))(((Math.sqrt function)))Adding a getter property to the -constructor can be done with the `Object.defineProperty` function. To -compute the distance from (0, 0) to (x, y), you can use the -Pythagorean theorem, which says that the square of the distance we are -looking for is equal to the square of the x-coordinate plus the square -of the y-coordinate. Thus, (!html √(x^2^ + y^2^pass:[)]!)(!tex pass:[$\sqrt{x^2 + y^2}$]!) -is the number you want, and `Math.sqrt` is the way you compute a square -root in JavaScript. - -!!hint!! - -=== Another cell === - -(((StretchCell (exercise))))(((interface)))Implement a cell type named -`StretchCell(inner, width, height)` that conforms to the -link:06_object.html#table_interface[table cell interface] described -earlier in the chapter. It should wrap another cell (like -`UnderlinedCell` does) and ensure that the resulting cell has at -least the given `width` and `height`, even if the inner cell would -naturally be smaller. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -var sc = new StretchCell(new TextCell("abc"), 1, 2); -console.log(sc.minWidth()); -// → 3 -console.log(sc.minHeight()); -// → 2 -console.log(sc.draw(3, 2)); -// → ["abc", " "] ----- - -endif::interactive_target[] - -!!hint!! - -(((StretchCell (exercise))))You'll have to store all three constructor -arguments in the instance object. The `minWidth` and `minHeight` -methods should call through to the corresponding methods in the -`inner` cell but ensure that no number less than the given size is -returned (possibly using `Math.max`). - -Don't forget to add a `draw` method that simply forwards the call to -the inner cell. - -!!hint!! - -=== Sequence interface === - -(((sequence (exercise))))Design an _((interface))_ that abstracts -((iteration)) over a ((collection)) of values. An object that provides -this interface represents a sequence, and the interface must somehow -make it possible for code that uses such an object to iterate over the -sequence, looking at the element values it is made up of and having -some way to find out when the end of the sequence is reached. - -When you have specified your interface, try to write a function -`logFive` that takes a sequence object and calls `console.log` on its -first five elements—or fewer, if the sequence has fewer than five -elements. - -Then implement an object type `ArraySeq` that wraps an array and -allows iteration over the array using the interface you designed. -Implement another object type `RangeSeq` that iterates over a range of -integers (taking `from` and `to` arguments to its constructor) -instead. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -logFive(new ArraySeq([1, 2])); -// → 1 -// → 2 -logFive(new RangeSeq(100, 1000)); -// → 100 -// → 101 -// → 102 -// → 103 -// → 104 ----- - -endif::interactive_target[] - -!!hint!! - -(((sequence (exercise))))(((collection)))One way to solve this is to -give the sequence objects _((state))_, meaning their properties are -changed in the process of using them. You could store a counter that -indicates how far the sequence object has advanced. - -Your ((interface)) will need to expose at least a way to get the next -element and to find out whether the iteration has reached the end of -the sequence yet. It is tempting to roll these into one method, -`next`, which returns `null` or `undefined` when the sequence is at -its end. But now you have a problem when a sequence actually contains -`null`. So a separate method (or getter property) to find out whether -the end has been reached is probably preferable. - -(((mutation)))(((pure function)))(((efficiency)))Another solution is -to avoid changing state in the object. You can expose a method for -getting the current element (without advancing any counter) and -another for getting a new sequence that represents the remaining -elements after the current one (or a special value if the end of the -sequence is reached). This is quite elegant—a sequence value will -“stay itself” even after it is used and can thus be shared with other -code without worrying about what might happen to it. It is, -unfortunately, also somewhat inefficient in a language like -JavaScript because it involves creating a lot of objects during -iteration. - -!!hint!! diff --git a/07_elife.txt b/07_elife.txt deleted file mode 100644 index be35381f8..000000000 --- a/07_elife.txt +++ /dev/null @@ -1,1240 +0,0 @@ -:chap_num: 7 -:prev_link: 06_object -:next_link: 08_error -:load_files: ["code/chapter/07_elife.js", "code/animateworld.js"] -:zip: html - -= Project: Electronic Life = - -[chapterquote="true"] -[quote, Edsger Dijkstra, The Threats to Computing Science] -____ -[...] the question of whether Machines Can Think [...] is about as -relevant as the question of whether Submarines Can Swim. -____ - -(((artificial intelligence)))(((Dijkstra+++,+++ Edsger)))(((project -chapter)))(((reading code)))(((writing code)))In “project” chapters, -I'll stop pummeling you with new theory for a brief moment and -instead work through a program with you. Theory is indispensable when -learning to program, but it should be accompanied by reading and -understanding nontrivial programs. - -(((artificial life)))(((electronic life)))(((ecosystem)))Our -project in this chapter is to build a virtual ecosystem, a little -world populated with ((critter))s that move around and struggle for -survival. - -== Definition == - -(((dimensions)))(((electronic life)))To make this -task manageable, we will radically simplify the concept of a -_((world))_. Namely, a world will be a two-dimensional ((grid)) where -each entity takes up one full square of the grid. On every _((turn))_, -the critters all get a chance to take some action. - -(((discretization)))(((simulation)))Thus, we chop both time and space -into units with a fixed size: squares for space and turns for time. Of -course, this is a somewhat crude and inaccurate ((approximation)). But -our simulation is intended to be amusing, not accurate, so we can -freely cut such corners. - -[[plan]] -(((array)))We can define a world with a _plan_, an array of -strings that lays out the world's grid using one character per square. - -// include_code - -[source,javascript] ----- -var plan = ["############################", - "# # # o ##", - "# #", - "# ##### #", - "## # # ## #", - "### ## # #", - "# ### # #", - "# #### #", - "# ## o #", - "# o # o ### #", - "# # #", - "############################"]; ----- - -The “#” characters in this plan represent ((wall))s and rocks, and the -“o” characters represent critters. The spaces, as you might have -guessed, are empty space. - -(((object)))(((toString method)))(((turn)))A plan array can be -used to create a ((world)) object. Such an object keeps track of the -size and content of the world. It has a `toString` method, which -converts the world back to a printable string (similar to the plan it -was based on) so that we can see what's going on inside. The world -object also has a `turn` method, which allows all the critters in it to -take one turn and updates the world to reflect their actions. - -[[grid]] -== Representing space == - -(((array,as grid)))(((Vector type)))(((coordinates)))The ((grid)) -that models the world has a fixed width and height. Squares are -identified by their x- and y-coordinates. We use a simple type, -`Vector` (as seen in the exercises for the -link:06_object.html#exercise_vector[previous chapter]), to represent -these coordinate pairs. - -// include_code - -[source,javascript] ----- -function Vector(x, y) { - this.x = x; - this.y = y; -} -Vector.prototype.plus = function(other) { - return new Vector(this.x + other.x, this.y + other.y); -}; ----- - -(((object)))(((encapsulation)))Next, we need an object type that -models the grid itself. A grid is part of a world, but we are making -it a separate object (which will be a property of a ((world)) object) -to keep the world object itself simple. The world should concern -itself with world-related things, and the grid should concern itself with grid-related things. - -(((array)))(((data structure)))To store a grid of values, we have -several options. We can use an array of row arrays and use two -property accesses to get to a specific square, like this: - -[source,javascript] ----- -var grid = [["top left", "top middle", "top right"], - ["bottom left", "bottom middle", "bottom right"]]; -console.log(grid[1][2]); -// → bottom right ----- - -(((array,indexing)))(((coordinates)))(((grid)))Or we can use a -single array, with size width × height, and decide that the element at -(_x_,_y_) is found at position _x_ + (_y_ × width) in the array. - -[source,javascript] ----- -var grid = ["top left", "top middle", "top right", - "bottom left", "bottom middle", "bottom right"]; -console.log(grid[2 + (1 * 3)]); -// → bottom right ----- - -(((encapsulation)))(((abstraction)))(((Array constructor)))(((array,creation)))(((array,length -of)))Since the actual access to this array will be wrapped in methods -on the grid object type, it doesn't matter to outside code which -approach we take. I chose the second representation because it makes -it much easier to create the array. When calling the `Array` -constructor with a single number as an argument, it creates a new empty -array of the given length. - -(((Grid type)))This code defines the `Grid` object, with some basic -methods: - -// include_code - -[source,javascript] ----- -function Grid(width, height) { - this.space = new Array(width * height); - this.width = width; - this.height = height; -} -Grid.prototype.isInside = function(vector) { - return vector.x >= 0 && vector.x < this.width && - vector.y >= 0 && vector.y < this.height; -}; -Grid.prototype.get = function(vector) { - return this.space[vector.x + this.width * vector.y]; -}; -Grid.prototype.set = function(vector, value) { - this.space[vector.x + this.width * vector.y] = value; -}; ----- - -And here is a trivial test: - -[source,javascript] ----- -var grid = new Grid(5, 5); -console.log(grid.get(new Vector(1, 1))); -// → undefined -grid.set(new Vector(1, 1), "X"); -console.log(grid.get(new Vector(1, 1))); -// → X ----- - -== A critter's programming interface == - -(((record)))(((electronic life)))(((interface)))Before we can -start on the `World` ((constructor)), we must get more specific about -the ((critter)) objects that will be living inside it. I mentioned -that the world will ask the critters what actions they want to take. -This works as follows: each critter object has an `act` ((method)) -that, when called, returns an _action_. An action is an object with a -`type` property, which names the type of action the critter wants to -take, for example `"move"`. The action may also contain extra -information, such as the direction the critter wants to move in. - -[[directions]] -(((Vector type)))(((View type)))(((directions object)))(((object,as map)))Critters are terribly myopic and can see only the -squares directly around them on the grid. But even this limited vision -can be useful when deciding which action to take. When the `act` -method is called, it is given a _view_ object that allows the critter -to inspect its surroundings. We name the eight surrounding squares by -their ((compass direction))s: `"n"` for north, `"ne"` for northeast, -and so on. Here's the object we will use to map from direction names -to coordinate offsets: - -// include_code - -[source,javascript] ----- -var directions = { - "n": new Vector( 0, -1), - "ne": new Vector( 1, -1), - "e": new Vector( 1, 0), - "se": new Vector( 1, 1), - "s": new Vector( 0, 1), - "sw": new Vector(-1, 1), - "w": new Vector(-1, 0), - "nw": new Vector(-1, -1) -}; ----- - -(((View type)))The view object has a method `look`, which takes a -direction and returns a character, for example `"#"` when there is a -wall in that direction, or `" "` (space) when there is nothing there. -The object also provides the convenient methods `find` and `findAll`. -Both take a map character as an argument. The first returns a direction -in which the character can be found next to the critter or returns `null` if -no such direction exists. The second returns an array containing all -directions with that character. For example, a creature sitting left -(west) of a wall will get `["ne", "e", "se"]` when calling `findAll` -on its view object with the `"#"` character as argument. - -(((bouncing)))(((behavior)))(((BouncingCritter type)))Here is a -simple, stupid critter that just follows its nose until it hits an -obstacle and then bounces off in a random open direction: - -// include_code - -[source,javascript] ----- -function randomElement(array) { - return array[Math.floor(Math.random() * array.length)]; -} - -var directionNames = "n ne e se s sw w nw".split(" "); - -function BouncingCritter() { - this.direction = randomElement(directionNames); -}; - -BouncingCritter.prototype.act = function(view) { - if (view.look(this.direction) != " ") - this.direction = view.find(" ") || "s"; - return {type: "move", direction: this.direction}; -}; ----- - -(((random number)))(((Math.random function)))(((randomElement -function)))(((array,indexing)))The `randomElement` helper -function simply picks a random element from an array, using -`Math.random` plus some arithmetic to get a random index. We'll use -this again later because randomness can be useful in ((simulation))s. - -(((Object.keys function)))To pick a random direction, the -`BouncingCritter` constructor calls `randomElement` on an array of -direction names. We could also have used `Object.keys` to get this -array from the `directions` object we defined -link:07_elife.html#directions[earlier], but that provides no -guarantees about the order in which the properties are listed. In most -situations, modern JavaScript engines will return properties in the -order they were defined, but they are not required to. - -(((|| operator)))(((null)))The “++|| "s"++” in the `act` method is -there to prevent `this.direction` from getting the value `null` if the -critter is somehow trapped with no empty space around it (for example -when crowded into a corner by other critters). - -== The world object == - -(((World type)))(((electronic life)))Now we can start on the -`World` object type. The ((constructor)) takes a plan (the array of -strings representing the world's grid, described -link:07_elife.html#grid[earlier]) and a _((legend))_ as arguments. A -legend is an object that tells us what each character in the map -means. It contains a constructor for every character—except for the -space character, which always refers to `null`, the value we'll use to -represent empty space. - -// include_code - -[source,javascript] ----- -function elementFromChar(legend, ch) { - if (ch == " ") - return null; - var element = new legend[ch](); - element.originChar = ch; - return element; -} - -function World(map, legend) { - var grid = new Grid(map[0].length, map.length); - this.grid = grid; - this.legend = legend; - - map.forEach(function(line, y) { - for (var x = 0; x < line.length; x++) - grid.set(new Vector(x, y), - elementFromChar(legend, line[x])); - }); -} ----- - -(((elementFromChar function)))(((object,as map)))In `elementFromChar`, -first we create an instance of the right type by looking up the -character's constructor and applying `new` to it. Then we add an -`originChar` ((property)) to it to make it easy to find out what -character the element was originally created from. - -(((toString method)))(((nesting,of loops)))(((for -loop)))(((coordinates)))We need this `originChar` property when -implementing the world's `toString` method. This method builds up a -maplike string from the world's current state by performing a -two-dimensional loop over the squares on the grid. - -// include_code - -[source,javascript] ----- -function charFromElement(element) { - if (element == null) - return " "; - else - return element.originChar; -} - -World.prototype.toString = function() { - var output = ""; - for (var y = 0; y < this.grid.height; y++) { - for (var x = 0; x < this.grid.width; x++) { - var element = this.grid.get(new Vector(x, y)); - output += charFromElement(element); - } - output += "\n"; - } - return output; -}; ----- - -(((electronic life)))(((constructor)))(((Wall type)))A ((wall)) is -a simple object—it is used only for taking up space and has no -`act` method. - -// include_code - -[source,javascript] ----- -function Wall() {} ----- - -(((World type)))When we try the `World` object by creating an -instance based on the plan from link:07_elife.html#plan[earlier in the -chapter] and then calling `toString` on it, we get a string very -similar to the plan we put in. - -// include_code strip_log -// test: trim - -[source,javascript] ----- -var world = new World(plan, {"#": Wall, - "o": BouncingCritter}); -console.log(world.toString()); -// → ############################ -// # # # o ## -// # # -// # ##### # -// ## # # ## # -// ### ## # # -// # ### # # -// # #### # -// # ## o # -// # o # o ### # -// # # # -// ############################ ----- - -== this and its scope == - -(((forEach -method)))(((function,scope)))(((this)))(((scope)))(((self -variable)))(((global object)))The `World` ((constructor)) contains a -call to `forEach`. One interesting thing to note is that inside the -function passed to `forEach`, we are no longer directly in the -function scope of the constructor. Each function call gets its own -`this` binding, so the `this` in the inner function does _not_ -refer to the newly constructed object that the outer `this` refers to. -In fact, when a function isn't called as a method, `this` will refer -to the global object. - -This means that we can't write `this.grid` to access the grid from -inside the ((loop)). Instead, the outer function creates a normal -local variable, `grid`, through which the inner function gets access -to the grid. - -(((future)))(((ECMAScript 6)))(((arrow function)))(((self -variable)))This is a bit of a design blunder in JavaScript. -Fortunately, the next version of the language provides a solution for -this problem. Meanwhile, there are workarounds. A common pattern is to -say `var self = this` and from then on refer to `self`, which is a -normal variable and thus visible to inner functions. - -(((bind method)))(((this)))Another solution is to use the `bind` -method, which allows us to provide an explicit `this` object to bind -to. - -[source,javascript] ----- -var test = { - prop: 10, - addPropTo: function(array) { - return array.map(function(elt) { - return this.prop + elt; - }.bind(this)); - } -}; -console.log(test.addPropTo([5])); -// → [15] ----- - -(((map method)))The function passed to `map` is the result of the -`bind` call and thus has its `this` bound to the first argument given -to ++bind++—the outer function's `this` value (which holds the `test` -object). - -(((context parameter)))(((function,higher-order)))Most ((standard)) -higher-order methods on arrays, such as `forEach` and `map`, take an -optional second argument that can also be used to provide a `this` for -the calls to the iteration function. So you could express the previous example -in a slightly simpler way. - -[source,javascript] ----- -var test = { - prop: 10, - addPropTo: function(array) { - return array.map(function(elt) { - return this.prop + elt; - }, this); // ← no bind - } -}; -console.log(test.addPropTo([5])); -// → [15] ----- - -This works only for higher-order functions that -support such a _context_ parameter. When they don't, you'll need to -use one of the other approaches. - -(((context parameter)))(((function,higher-order)))(((call method)))In -our own higher-order functions, we can support such a context -parameter by using the `call` method to call the function given as an -argument. For example, here is a `forEach` method for our `Grid` type, -which calls a given function for each element in the grid that isn't -null or undefined: - -// include_code - -[source,javascript] ----- -Grid.prototype.forEach = function(f, context) { - for (var y = 0; y < this.height; y++) { - for (var x = 0; x < this.width; x++) { - var value = this.space[x + y * this.width]; - if (value != null) - f.call(context, value, new Vector(x, y)); - } - } -}; ----- - -== Animating life == - -(((simulation)))(((electronic life)))(((World type)))The next -step is to write a `turn` method for the world object that gives the -((critter))s a chance to act. It will go over the grid using the -`forEach` method we just defined, looking for objects with an `act` -method. When it finds one, `turn` calls that method to get an action -object and carries out the action when it is valid. For now, only -`"move"` actions are understood. - -(((grid)))There is one potential problem with this approach. Can you -spot it? If we let critters move as we come across them, they may move -to a square that we haven't looked at yet, and we'll allow them to -move _again_ when we reach that square. Thus, we have to keep an array -of critters that have already had their turn and ignore them when we -see them again. - -// include_code - -[source,javascript] ----- -World.prototype.turn = function() { - var acted = []; - this.grid.forEach(function(critter, vector) { - if (critter.act && acted.indexOf(critter) == -1) { - acted.push(critter); - this.letAct(critter, vector); - } - }, this); -}; ----- - -(((this)))We use the second parameter to the grid's `forEach` method -to be able to access the correct `this` inside the inner function. -The `letAct` method contains the actual logic that allows the critters -to move. - -// include_code - -[[checkDestination]] -[source,javascript] ----- -World.prototype.letAct = function(critter, vector) { - var action = critter.act(new View(this, vector)); - if (action && action.type == "move") { - var dest = this.checkDestination(action, vector); - if (dest && this.grid.get(dest) == null) { - this.grid.set(vector, null); - this.grid.set(dest, critter); - } - } -}; - -World.prototype.checkDestination = function(action, vector) { - if (directions.hasOwnProperty(action.direction)) { - var dest = vector.plus(directions[action.direction]); - if (this.grid.isInside(dest)) - return dest; - } -}; ----- - -(((View type)))(((electronic life)))First, we simply ask the -critter to act, passing it a view object that knows about the world -and the critter's current position in that world (we'll define `View` -in a link:07_elife.html#view[moment]). The `act` method returns an -action of some kind. - -If the action's `type` is not `"move"`, it is ignored. If it _is_ -`"move"`, if it has a `direction` property that refers to a valid -direction, _and_ if the square in that direction is empty (null), we set -the square where the critter used to be to hold null and store the -critter in the destination square. - -(((error tolerance)))(((defensive programming)))(((sloppy -programming)))(((validation)))Note that `letAct` takes care to ignore -nonsense ((input))—it doesn't assume that the action's `direction` -property is valid or that the `type` property makes sense. This kind -of _defensive_ programming makes sense in some situations. The main -reason for doing it is to validate inputs coming from sources you -don't control (such as user or file input), but it can also be useful -to isolate subsystems from each other. In this case, the intention is -that the critters themselves can be programmed sloppily—they don't -have to verify if their intended actions make sense. They can just -request an action, and the world will figure out whether to allow it. - -(((interface)))(((private property)))(((access -control)))(((property,naming)))(((underscore character)))(((World -type)))These two methods are not part of the external interface of a -`World` object. They are an internal detail. Some languages provide -ways to explicitly declare certain methods and properties _private_ -and signal an error when you try to use them from outside the object. -JavaScript does not, so you will have to rely on some other form of -communication to describe what is part of an object's interface. -Sometimes it can help to use a naming scheme to distinguish between -external and internal properties, for example by prefixing all -internal ones with an underscore character (_). This will make -accidental uses of properties that are not part of an object's -interface easier to spot. - -[[view]] -(((View type)))The one missing part, the `View` type, looks like this: - -// include_code - -[source,javascript] ----- -function View(world, vector) { - this.world = world; - this.vector = vector; -} -View.prototype.look = function(dir) { - var target = this.vector.plus(directions[dir]); - if (this.world.grid.isInside(target)) - return charFromElement(this.world.grid.get(target)); - else - return "#"; -}; -View.prototype.findAll = function(ch) { - var found = []; - for (var dir in directions) - if (this.look(dir) == ch) - found.push(dir); - return found; -}; -View.prototype.find = function(ch) { - var found = this.findAll(ch); - if (found.length == 0) return null; - return randomElement(found); -}; ----- - -(((defensive programming)))The `look` method figures out the -coordinates that we are trying to look at and, if they are inside the -((grid)), finds the character corresponding to the element that sits -there. For coordinates outside the grid, `look` simply pretends that -there is a wall there so that if you define a world that isn't walled -in, the critters still won't be tempted to try to walk off the edges. - -== It moves == - -(((electronic life)))(((simulation)))We instantiated a world -object earlier. Now that we've added all the necessary methods, it -should be possible to actually make the world move. - -[source,javascript] ----- -for (var i = 0; i < 5; i++) { - world.turn(); - console.log(world.toString()); -} -// → … five turns of moving critters ----- - -ifdef::book_target[] - -The first two maps that are displayed will look something like this -(depending on the random direction the critters picked): - ----- -############################ ############################ -# # # ## # # # ## -# o # # # -# ##### # # ##### o # -## # # ## # ## # # ## # -### ## # # ### ## # # -# ### # # # ### # # -# #### # # #### # -# ## # # ## # -# # o ### # #o # ### # -#o # o # # # o o # -############################ ############################ ----- - -(((animation)))They move! To get a more interactive view of these -critters crawling around and bouncing off the walls, open this chapter -in the online version of the book at -http://eloquentjavascript.net[_eloquentjavascript.net_]. - -endif::book_target[] - -ifdef::interactive_target[] - -Simply printing out many copies of the map is a rather unpleasant -way to observe a world, though. That's why the sandbox provides an -`animateWorld` function that will run a world as an onscreen -animation, moving three turns per second, until you hit the stop -button. - -// test: no - -[source,javascript] ----- -animateWorld(world); -// → … life! ----- - -The implementation of `animateWorld` will remain a mystery for now, -but after you've read the link:13_dom.html#dom[later chapters] of this -book, which discuss JavaScript integration in web browsers, it won't -look so magical anymore. - -endif::interactive_target[] - -== More life forms == - -The dramatic highlight of our world, if you watch for a bit, is when -two critters bounce off each other. Can you think of another -interesting form of ((behavior))? - -(((wall following)))The one I came up with is a ((critter)) that moves -along walls. Conceptually, the critter keeps its left hand (paw, -tentacle, whatever) to the wall and follows along. This turns out to -be not entirely trivial to implement. - -(((WallFollower type)))(((directions object)))We need to be -able to “compute” with ((compass direction))s. Since directions are -modeled by a set of strings, we need to define our own operation -(`dirPlus`) to calculate relative directions. So `dirPlus("n", 1)` -means one 45-degree turn clockwise from north, giving `"ne"`. -Similarly, `dirPlus("s", -2)` means 90 degrees counterclockwise from -south, which is east. - -// include_code - -[source,javascript] ----- -function dirPlus(dir, n) { - var index = directionNames.indexOf(dir); - return directionNames[(index + n + 8) % 8]; -} - -function WallFollower() { - this.dir = "s"; -} - -WallFollower.prototype.act = function(view) { - var start = this.dir; - if (view.look(dirPlus(this.dir, -3)) != " ") - start = this.dir = dirPlus(this.dir, -2); - while (view.look(this.dir) != " ") { - this.dir = dirPlus(this.dir, 1); - if (this.dir == start) break; - } - return {type: "move", direction: this.dir}; -}; ----- - -(((artificial intelligence)))(((pathfinding)))(((View type)))The `act` -method only has to “scan” the critter's surroundings, starting from -its left side and going clockwise until it finds an empty square. -It then moves in the direction of that empty square. - -What complicates things is that a critter may end up in the middle of -empty space, either as its start position or as a result of walking -around another critter. If we apply the approach I just described in -empty space, the poor critter will just keep on turning left at every -step, running in circles. - -So there is an extra check (the `if` statement) to start scanning to -the left only if it looks like the critter has just passed some kind -of ((obstacle))—that is, if the space behind and to the left of the -critter is not empty. Otherwise, the critter starts scanning directly -ahead, so that it'll walk straight when in empty space. - -(((infinite loop)))And finally, there's a test comparing `this.dir` to -`start` after every pass through the loop to make sure that the loop -won't run forever when the critter is walled in or crowded in by other -critters and can't find an empty square. - -ifdef::interactive_target[] - -This small world demonstrates the wall-following creatures: - -// test: no - -[source,javascript] ----- -animateWorld(new World( - ["############", - "# # #", - "# ~ ~ #", - "# ## #", - "# ## o####", - "# #", - "############"], - {"#": Wall, - "~": WallFollower, - "o": BouncingCritter} -)); ----- - -endif::interactive_target[] - -== A more lifelike simulation == - -(((simulation)))(((electronic life)))To make life in our world -more interesting, we will add the concepts of ((food)) and -((reproduction)). Each living thing in the world gets a new property, -`energy`, which is reduced by performing actions and increased by -eating things. When the critter has enough ((energy)), it can -reproduce, generating a new critter of the same kind. To keep things -simple, the critters in our world reproduce asexually, all by -themselves. - -(((energy)))(((entropy)))If critters only move around and eat one -another, the world will soon succumb to the law of increasing entropy, -run out of energy, and become a lifeless wasteland. To prevent this -from happening (too quickly, at least), we add ((plant))s to the -world. Plants do not move. They just use ((photosynthesis)) to grow -(that is, increase their energy) and reproduce. - -(((World type)))To make this work, we'll need a world with a different -`letAct` method. We could just replace the method of the `World` -prototype, but I've become very attached to our simulation with the -wall-following critters and would hate to break that old world. - -(((actionTypes object)))(((LifeLikeWorld type)))One solution is to use -((inheritance)). We create a new ((constructor)), `LifelikeWorld`, -whose prototype is based on the `World` prototype but which overrides -the `letAct` method. The new `letAct` method delegates the work of -actually performing an action to various functions stored in the -`actionTypes` object. - -// include_code - -[source,javascript] ----- -function LifelikeWorld(map, legend) { - World.call(this, map, legend); -} -LifelikeWorld.prototype = Object.create(World.prototype); - -var actionTypes = Object.create(null); - -LifelikeWorld.prototype.letAct = function(critter, vector) { - var action = critter.act(new View(this, vector)); - var handled = action && - action.type in actionTypes && - actionTypes[action.type].call(this, critter, - vector, action); - if (!handled) { - critter.energy -= 0.2; - if (critter.energy <= 0) - this.grid.set(vector, null); - } -}; ----- - -(((electronic life)))(((function,as value)))(((call -method)))(((this)))The new `letAct` method first checks whether an -action was returned at all, then whether a handler function for this -type of action exists, and finally whether that handler returned -true, indicating that it successfully handled the action. Note the use -of `call` to give the handler access to the world, through its `this` -binding. - -If the action didn't work for whatever reason, the default action is -for the creature to simply wait. It loses one-fifth point of ((energy)), -and if its energy level drops to zero or below, the creature dies and -is removed from the grid. - -== Action handlers == - -(((photosynthesis)))The simplest action a creature can perform is -`"grow"`, used by ((plant))s. When an action object like `{type: -"grow"}` is returned, the following handler method will be called: - -// include_code - -[source,javascript] ----- -actionTypes.grow = function(critter) { - critter.energy += 0.5; - return true; -}; ----- - -Growing always succeeds and adds half a point to the plant's -((energy)) level. - -Moving is more involved. - -// include_code - -[source,javascript] ----- -actionTypes.move = function(critter, vector, action) { - var dest = this.checkDestination(action, vector); - if (dest == null || - critter.energy <= 1 || - this.grid.get(dest) != null) - return false; - critter.energy -= 1; - this.grid.set(vector, null); - this.grid.set(dest, critter); - return true; -}; ----- - -(((validation)))This action first checks, using the `checkDestination` -method defined link:07_elife.html#checkDestination[earlier], whether -the action provides a valid destination. If not, or if the -destination isn't empty, or if the critter lacks the required -((energy)), `move` returns false to indicate no action was taken. -Otherwise, it moves the critter and subtracts the energy cost. - -(((food)))In addition to moving, critters can eat. - -// include_code - -[source,javascript] ----- -actionTypes.eat = function(critter, vector, action) { - var dest = this.checkDestination(action, vector); - var atDest = dest != null && this.grid.get(dest); - if (!atDest || atDest.energy == null) - return false; - critter.energy += atDest.energy; - this.grid.set(dest, null); - return true; -}; ----- - -(((validation)))Eating another ((critter)) also involves providing a -valid destination square. This time, the destination must not be -empty and must contain something with ((energy)), like a critter (but -not a wall—walls are not edible). If so, the energy from the eaten is -transferred to the eater, and the victim is removed from the grid. - -(((reproduction)))And finally, we allow our critters to reproduce. - -// include_code - -[source,javascript] ----- -actionTypes.reproduce = function(critter, vector, action) { - var baby = elementFromChar(this.legend, - critter.originChar); - var dest = this.checkDestination(action, vector); - if (dest == null || - critter.energy <= 2 * baby.energy || - this.grid.get(dest) != null) - return false; - critter.energy -= 2 * baby.energy; - this.grid.set(dest, baby); - return true; -}; ----- - -(((electronic life)))Reproducing costs twice the ((energy)) -level of the newborn critter. So we first create a (hypothetical) baby -using `elementFromChar` on the critter's own origin character. Once we -have a baby, we can find its energy level and test whether the parent -has enough energy to successfully bring it into the world. We also -require a valid (and empty) destination. - -(((reproduction)))If everything is okay, the baby is put onto the grid -(it is now no longer hypothetical), and the energy is spent. - -== Populating the new world == - -(((Plant type)))(((electronic life)))We now have a -((framework)) to simulate these more lifelike creatures. We could put -the critters from the old world into it, but they would just die -since they don't have an ((energy)) property. So let's make new ones. -First we'll write a ((plant)), which is a rather simple life-form. - -// include_code - -[source,javascript] ----- -function Plant() { - this.energy = 3 + Math.random() * 4; -} -Plant.prototype.act = function(view) { - if (this.energy > 15) { - var space = view.find(" "); - if (space) - return {type: "reproduce", direction: space}; - } - if (this.energy < 20) - return {type: "grow"}; -}; ----- - -(((reproduction)))(((photosynthesis)))(((random -number)))(((Math.random function)))Plants start with an energy level -between 3 and 7, randomized so that they don't all reproduce in the -same turn. When a plant reaches 15 energy points and there is empty -space nearby, it reproduces into that empty space. If a plant can't -reproduce, it simply grows until it reaches energy level 20. - -(((critter)))(((PlantEater type)))(((herbivore)))(((food chain)))We -now define a plant eater. - -// include_code - -[source,javascript] ----- -function PlantEater() { - this.energy = 20; -} -PlantEater.prototype.act = function(view) { - var space = view.find(" "); - if (this.energy > 60 && space) - return {type: "reproduce", direction: space}; - var plant = view.find("*"); - if (plant) - return {type: "eat", direction: plant}; - if (space) - return {type: "move", direction: space}; -}; ----- - -We'll use the `*` character for ((plant))s, so that's what this -creature will look for when it searches for ((food)). - -== Bringing it to life == - -(((electronic life)))And that gives us enough elements to try -our new world. Imagine the following map as a grassy valley with a herd of -((herbivore))s in it, some boulders, and lush ((plant)) life -everywhere. - -// include_code - -[source,javascript] ----- -var valley = new LifelikeWorld( - ["############################", - "##### ######", - "## *** **##", - "# *##** ** O *##", - "# *** O ##** *#", - "# O ##*** #", - "# ##** #", - "# O #* #", - "#* #** O #", - "#*** ##** O **#", - "##**** ###*** *###", - "############################"], - {"#": Wall, - "O": PlantEater, - "*": Plant} -); ----- - -(((animation)))(((simulation)))Let's see what happens if we run this. -(!book These snapshots illustrate a typical run of this world.!) - -ifdef::interactive_target[] - -// start_code -// test: no - -[source,javascript] ----- -animateWorld(valley); ----- - -endif::interactive_target[] - -ifdef::book_target[] - ----- -############################ ############################ -##### ###### ##### ** ###### -## *** O *## ## ** * O ## -# *##* ** *## # **## ## -# ** ##* *# # ** O ##O # -# ##* # # *O * * ## # -# ## O # # *** ## O # -# #* O # #** #*** # -#* #** O # #** O #**** # -#* O O ##* **# #*** ##*** O # -##* ###* ### ##** ###** O ### -############################ ############################ - -############################ ############################ -#####O O ###### ##### O ###### -## ## ## ## -# ##O ## # ## O ## -# O O *## # # ## # -# O O O **## O # # ## # -# **## O # # O ## * # -# # *** * # # # O # -# # O***** O # # O # O # -# ##****** # # ## O O # -## ###****** ### ## ### O ### -############################ ############################ - -############################ ############################ -##### ###### ##### ###### -## ## ## ** * ## -# ## ## # ## ***** ## -# ## # # ##**** # -# ##* * # # ##***** # -# O ## * # # ##****** # -# # # # # ** ** # -# # # # # # -# ## # # ## # -## ### ### ## ### ### -############################ ############################ ----- - -endif::book_target[] - -(((stability)))(((reproduction)))(((extinction)))(((starvation)))Most -of the time, the plants multiply and expand quite quickly, but then -the abundance of ((food)) causes a population explosion of the -((herbivore))s, who proceed to wipe out all or nearly all of the -((plant))s, resulting in a mass starvation of the critters. Sometimes, -the ((ecosystem)) recovers and another cycle starts. At other times, -one of the species dies out completely. If it's the herbivores, the -whole space will fill with plants. If it's the plants, the remaining -critters starve, and the valley becomes a desolate wasteland. Ah, the -cruelty of nature. - -== Exercises == - -=== Artificial stupidity === - -(((artificial stupidity (exercise))))(((artificial -intelligence)))(((extinction)))Having the inhabitants of our world go -extinct after a few minutes is kind of depressing. To deal with this, -we could try to create a smarter plant eater. - -(((pathfinding)))(((reproduction)))(((food)))There are several obvious -problems with our herbivores. First, they are terribly greedy, -stuffing themselves with every plant they see until they have wiped -out the local plant life. Second, their randomized movement (recall -that the `view.find` method returns a random direction when multiple -directions match) causes them to stumble around ineffectively and -starve if there don't happen to be any plants nearby. And finally, -they breed very fast, which makes the cycles between abundance and -famine quite intense. - -Write a new critter type that tries to address one or more of these -points and substitute it for the old `PlantEater` type in the valley -world. See how it fares. Tweak it some more if necessary. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here -function SmartPlantEater() {} - -animateWorld(new LifelikeWorld( - ["############################", - "##### ######", - "## *** **##", - "# *##** ** O *##", - "# *** O ##** *#", - "# O ##*** #", - "# ##** #", - "# O #* #", - "#* #** O #", - "#*** ##** O **#", - "##**** ###*** *###", - "############################"], - {"#": Wall, - "O": SmartPlantEater, - "*": Plant} -)); ----- - -endif::interactive_target[] - -!!hint!! - -(((artificial stupidity (exercise))))(((artificial -intelligence)))(((behavior)))(((state)))The greediness problem can be -attacked in several ways. The critters could stop eating when they -reach a certain ((energy)) level. Or they could eat only every N turns (by -keeping a counter of the turns since their last meal in a property on -the creature object). Or, to make sure plants never go entirely -extinct, the animals could refuse to eat a ((plant)) unless they see -at least one other plant nearby (using the `findAll` method on the -view). A combination of these, or some entirely different strategy, -might also work. - -(((pathfinding)))(((wall following)))Making the critters move more -effectively could be done by stealing one of the movement strategies -from the critters in our old, energyless world. Both the bouncing -behavior and the wall-following behavior showed a much wider range of -movement than completely random staggering. - -(((reproduction)))(((stability)))Making creatures breed more slowly is -trivial. Just increase the minimum energy level at which they -reproduce. Of course, making the ecosystem more stable also makes it -more boring. If you have a handful of fat, immobile critters forever -munching on a sea of plants and never reproducing, that makes for a -very stable ecosystem. But no one wants to watch that. - -!!hint!! - -=== Predators === - -(((predators (exercise))))(((carnivore)))(((food chain)))Any serious -((ecosystem)) has a food chain longer than a single link. Write -another ((critter)) that survives by eating the ((herbivore)) critter. -You'll notice that ((stability)) is even harder to achieve now that there -are cycles at multiple levels. Try to find a strategy to make the -ecosystem run smoothly for at least a little while. - -(((Tiger type)))One thing that will help is to make the world bigger. -This way, local population booms or busts are less likely to wipe out -a species entirely, and there is space for the relatively large prey -population needed to sustain a small predator population. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here -function Tiger() {} - -animateWorld(new LifelikeWorld( - ["####################################################", - "# #### **** ###", - "# * @ ## ######## OO ##", - "# * ## O O **** *#", - "# ##* ########## *#", - "# ##*** * **** **#", - "#* ** # * *** ######### **#", - "#* ** # * # * **#", - "# ## # O # *** ######", - "#* @ # # * O # #", - "#* # ###### ** #", - "### **** *** ** #", - "# O @ O #", - "# * ## ## ## ## ### * #", - "# ** # * ##### O #", - "## ** O O # # *** *** ### ** #", - "### # ***** ****#", - "####################################################"], - {"#": Wall, - "@": Tiger, - "O": SmartPlantEater, // from previous exercise - "*": Plant} -)); ----- - -endif::interactive_target[] - -!!hint!! - -(((predators (exercise))))(((reproduction)))(((starvation)))Many of -the same tricks that worked for the previous exercise also apply here. -Making the predators big (lots of energy) and having them reproduce -slowly is recommended. That'll make them less vulnerable to periods of -starvation when the herbivores are scarce. - -Beyond staying alive, keeping its ((food)) stock alive is a -predator's main objective. Find some way to make predators hunt -more aggressively when there are a lot of ((herbivore))s and hunt more -slowly (or not at all) when prey is rare. Since plant eaters move -around, the simple trick of eating one only when others are nearby is -unlikely to work—that'll happen so rarely that your predator will -starve. But you could keep track of observations in previous turns, in -some ((data structure)) kept on the predator objects, and have it base -its ((behavior)) on what it has seen recently. - -!!hint!! diff --git a/08_error.txt b/08_error.txt deleted file mode 100644 index 1d000ab2e..000000000 --- a/08_error.txt +++ /dev/null @@ -1,857 +0,0 @@ -:chap_num: 8 -:prev_link: 07_elife -:next_link: 09_regexp -:load_files: ["code/chapter/08_error.js"] - -= Bugs and Error Handling = - -[chapterquote="true"] -[quote, Brian Kernighan and P.J. Plauger, The Elements of Programming Style] -____ -Debugging is -twice as hard as writing the code in the first place. Therefore, if -you write the code as cleverly as possible, you are, by definition, -not smart enough to debug it. -____ - -ifdef::interactive_target[] - -[chapterquote="true"] -[quote, Master Yuan-Ma, The Book of Programming] -____ -Yuan-Ma had written a small program that used many global variables -and shoddy shortcuts. Reading it, a student asked, ‘You warned us -against these techniques, yet I find them in your program. How can -this be?’ The master said, ‘There is no need to fetch a water hose -when the house is not on fire.’ -____ - -endif::interactive_target[] - -(((Kernighan+++,+++ Brian)))(((Plaugher+++,+++ P.J.)))(((debugging)))(((error handling)))A program is crystallized thought. -Sometimes those thoughts are confused. Other times, mistakes are -introduced when converting thought into code. Either way, the result -is a flawed program. - -(((input)))(((output)))Flaws in a program are usually called ((bug))s. -Bugs can be programmer errors or problems in other systems that the -program interacts with. Some bugs are immediately apparent, while -others are subtle and might remain hidden in a system for years. - -Often, problems surface only when a program encounters a situation -that the programmer didn't originally consider. Sometimes such -situations are unavoidable. When the user is asked to input their age -and types _orange_, this puts our program in a difficult position. The -situation has to be anticipated and handled somehow. - -== Programmer mistakes == - -(((parsing)))(((analysis)))When it comes to programmer mistakes, our -aim is simple. We want to find them and fix them. Such mistakes can -range from simple ((typo))s that cause the computer to complain as -soon as it lays eyes on our program to subtle mistakes in our -understanding of the way the program operates, causing incorrect -outcomes only in specific situations. Bugs of the latter type can -take weeks to diagnose. - -(((programming language)))(((type)))(((static typing)))(((dynamic -typing)))(((run-time error)))(((error)))The degree to which languages -help you find such mistakes varies. Unsurprisingly, JavaScript is at -the “hardly helps at all” end of that scale. Some languages want to -know the types of all your variables and expressions before even -running a program and will tell you right away when a type is used in -an inconsistent way. JavaScript considers types only when actually -running the program, and even then, it allows you to do some clearly -nonsensical things without complaint, such as `x = true * "monkey"`. - -(((syntax)))There are some things that JavaScript does complain about, -though. Writing a program that is not syntactically valid will -immediately trigger an error. Other things, such as calling something -that's not a function or looking up a ((property)) on an ((undefined)) -value, will cause an error to be reported when the program is running -and encounters the nonsensical action. - -(((NaN)))(((error)))But often, your nonsense computation will simply -produce a `NaN` (not a number) or undefined value. And the program -happily continues, convinced that it's doing something meaningful. The -mistake will manifest itself only later, after the bogus value has -traveled through several functions. It might not trigger an error at -all but silently cause the program's output to be wrong. Finding the -source of such problems can be difficult. - -(((debugging)))The process of finding mistakes—bugs—in programs is -called _debugging_. - -== Strict mode == - -indexsee:[use strict,strict mode] -(((strict mode)))(((syntax)))(((function)))JavaScript can be made a -_little_ more strict by enabling _strict mode_. This is done by -putting the string `"use strict"` at the top of a file or a function -body. Here's an example: - -// test: error "ReferenceError: counter is not defined" - -[source,javascript] ----- -function canYouSpotTheProblem() { - "use strict"; - for (counter = 0; counter < 10; counter++) - console.log("Happy happy"); -} - -canYouSpotTheProblem(); -// → ReferenceError: counter is not defined ----- - -(((var keyword)))(((variable,global)))Normally, when you forget to put -`var` in front of your variable, as with `counter` in the example, -JavaScript quietly creates a global variable and uses that. In strict -mode, however, an ((error)) is reported instead. This is very helpful. -It should be noted, though, that this doesn't work when the variable -in question already exists as a global variable, but only when -assigning to it would have created it. - -(((this)))(((global object)))(((undefined)))(((strict mode)))Another -change in strict mode is that the `this` binding holds the value -`undefined` in functions that are not called as ((method))s. When -making such a call outside of strict mode, `this` refers to the global -scope object. So if you accidentally call a method or constructor -incorrectly in strict mode, JavaScript will produce an error as soon -as it tries to read something from `this`, rather than happily working -with the global object, creating and reading global variables. - -For example, consider the following code, which calls a -((constructor)) without the `new` keyword so that its `this` will -_not_ refer to a newly constructed object: - -[source,javascript] ----- -function Person(name) { this.name = name; } -var ferdinand = Person("Ferdinand"); // oops -console.log(name); -// → Ferdinand ----- - -(((error)))So the bogus call to `Person` succeeded but returned an -undefined value and created the global variable `name`. In strict -mode, the result is different. - -// test: error "TypeError: Cannot set property 'name' of undefined" - -[source,javascript] ----- -"use strict"; -function Person(name) { this.name = name; } -// Oops, forgot 'new' -var ferdinand = Person("Ferdinand"); -// → TypeError: Cannot set property 'name' of undefined ----- - -We are immediately told that something is wrong. This is helpful. - -(((parameter)))(((variable,naming)))(((with statement)))Strict mode -does a few more things. It disallows giving a function multiple -parameters with the same name and removes certain problematic -language features entirely (such as the `with` statement, which is so -misguided it is not further discussed in this book). - -(((debugging)))In short, putting a `"use strict"` at the top of your -program rarely hurts and might help you spot a problem. - -== Testing == - -(((test suite)))(((run-time error)))If the language is not going to do -much to help us find mistakes, we'll have to find them the hard way: -by running the program and seeing whether it does the right thing. - -Doing this by hand, again and again, is a sure way to drive yourself -insane. Fortunately, it is often possible to write a second program -that automates testing your actual program. - -(((Vector type)))As an example, we once again use the `Vector` type. - -// include_code - -[source,javascript] ----- -function Vector(x, y) { - this.x = x; - this.y = y; -} -Vector.prototype.plus = function(other) { - return new Vector(this.x + other.x, this.y + other.y); -}; ----- - -We will write a program to check that our implementation of `Vector` -works as intended. Then, every time we change the implementation, we -follow up by running the test program so that we can be reasonably -confident that we didn't break anything. When we add extra -functionality (for example, a new method) to the `Vector` type, we also -add tests for the new feature. - -[source,javascript] ----- -function testVector() { - var p1 = new Vector(10, 20); - var p2 = new Vector(-10, 5); - var p3 = p1.plus(p2); - - if (p1.x !== 10) return "fail: x property"; - if (p1.y !== 20) return "fail: y property"; - if (p2.x !== -10) return "fail: negative x property"; - if (p3.x !== 0) return "fail: x from plus"; - if (p3.y !== 25) return "fail: y from plus"; - return "everything ok"; -} -console.log(testVector()); -// → everything ok ----- - -(((test suite)))(((testing framework)))(((domain-specific -language)))Writing tests like this tends to produce rather repetitive, -awkward code. Fortunately, there exist pieces of software that help -you build and run collections of tests (_test suites_) by providing a -language (in the form of functions and methods) suited to expressing -tests and by outputting informative information when a test fails. These -are called _testing frameworks_. - -== Debugging == - -(((debugging)))Once you notice that there is something wrong with your -program because it misbehaves or produces errors, the next step is to -figure out _what_ the problem is. - -Sometimes it is obvious. The ((error)) message will point at a -specific line of your program, and if you look at the error -description and that line of code, you can often see the problem. - -(((run-time error)))But not always. Sometimes the line that triggered -the problem is simply the first place where a bogus value produced -elsewhere gets used in an invalid way. And sometimes there is no error -message at all—just an invalid result. If you have been solving the -((exercises)) in the earlier chapters, you will probably have already -experienced such situations. - -(((decimal number)))(((binary number)))The following example program -tries to convert a whole number to a string in any base (decimal, -binary, and so on) by repeatedly picking out the last ((digit)) and then -dividing the number to get rid of this digit. But the insane output -that it currently produces suggests that it has a ((bug)). - -[source,javascript] ----- -function numberToString(n, base) { - var result = "", sign = ""; - if (n < 0) { - sign = "-"; - n = -n; - } - do { - result = String(n % base) + result; - n /= base; - } while (n > 0); - return sign + result; -} -console.log(numberToString(13, 10)); -// → 1.5e-3231.3e-3221.3e-3211.3e-3201.3e-3191.3e-3181.3… ----- - -(((analysis)))Even if you see the problem already, pretend for a -moment that you don't. We know that our program is malfunctioning, and -we want to find out why. - -(((trial and error)))This is where you must resist the urge to start -making random changes to the code. Instead, _think_. Analyze what is -happening and come up with a ((theory)) of why it might be happening. -Then, make additional observations to test this theory—or, if you -don't yet have a theory, make additional observations that might help -you come up with one. - -(((console.log)))(((output)))(((debugging)))(((logging)))Putting a few -strategic `console.log` calls into the program is a good way to get -additional information about what the program is doing. In this case, -we want `n` to take the values `13`, `1`, and then `0`. Let's write -out its value at the start of the loop. - ----- -13 -1.3 -0.13 -0.013 -… -1.5e-323 ----- - -(((rounding)))_Right_. Dividing 13 by 10 does not produce a whole -number. Instead of `n /= base`, what we actually want is `n = -Math.floor(n / base)` so that the number is properly “shifted” to the -right. - -(((JavaScript console)))(((breakpoint)))(((debugger statement)))An -alternative to using `console.log` is to use the _debugger_ -capabilities of your browser. Modern browsers come with the ability to -set a _breakpoint_ on a specific line of your code. This will cause -the execution of the program to pause every time the line with the -breakpoint is reached and allow you to inspect the values of -variables at that point. I won't go into details here since debuggers -differ from browser to browser, but look in your browser's developer -tools and search the Web for more information. Another way to set a -breakpoint is to include a `debugger` statement (consisting of simply -that keyword) in your program. If the ((developer tools)) of your -browser are active, the program will pause whenever it reaches that -statement, and you will be able to inspect its state. - -== Error propagation == - -(((input)))(((output)))(((run-time -error)))(((error)))(((validation)))Not all problems can be prevented -by the programmer, unfortunately. If your program communicates with -the outside world in any way, there is a chance that the input it gets -will be invalid or that other systems that it tries to talk to are -broken or unreachable. - -(((error recovery)))Simple programs, or programs that run only under -your supervision, can afford to just give up when such a problem -occurs. You'll look into the problem and try again. “Real” -applications, on the other hand, are expected to not simply crash. -Sometimes the right thing to do is take the bad input in stride and -continue running. In other cases, it is better to report to the user -what went wrong and then give up. But in either situation, the program -has to actively do something in response to the problem. - -(((promptInteger function)))(((validation)))Say you have a function -`promptInteger` that asks the user for a whole number and returns it. -What should it return if the user inputs _orange_? - -(((null)))(((undefined)))(((return value)))(((special return -value)))One option is to make it return a special value. Common -choices for such values are `null` and `undefined`. - -// test: no - -[source,javascript] ----- -function promptNumber(question) { - var result = Number(prompt(question, "")); - if (isNaN(result)) return null; - else return result; -} - -console.log(promptNumber("How many trees do you see?")); ----- - -This is a sound strategy. Now any code that calls `promptNumber` must -check whether an actual number was read and, failing that, must -somehow recover—maybe by asking again or by filling in a default -value. Or it could again return a special value to _its_ caller to -indicate that it failed to do what it was asked. - -(((error handling)))In many situations, mostly when ((error))s are -common and the caller should be explicitly taking them into account, -returning a special value is a perfectly fine way to indicate an -error. It does, however, have its downsides. First, what if the -function can already return every possible kind of value? For such a -function, it is hard to find a special value that can be distinguished -from a valid result. - -(((special return value)))(((readability)))The second issue with -returning special values is that it can lead to some very cluttered -code. If a piece of code calls `promptNumber` 10 times, it has to -check 10 times whether `null` was returned. And if its response to -finding `null` is to simply return `null` itself, the caller will in -turn have to check for it, and so on. - -== Exceptions == - -(((error handling)))When a function cannot proceed normally, what we -would _like_ to do is just stop what we are doing and immediately jump -back to a place that knows how to handle the problem. This is what -_((exception handling))_ does. - -(((control flow)))(((raising (exception))))(((throw keyword)))(((call -stack)))Exceptions are a mechanism that make it possible for code that -runs into a problem to _raise_ (or _throw_) an exception, which is -simply a value. Raising an exception somewhat resembles a -super-charged return from a function: it jumps out of not just the -current function but also out of its callers, all the way down to the -first call that started the current execution. This is called -_((unwinding the stack))_. You may remember the stack of function -calls that was mentioned in link:03_functions.html#stack[Chapter 3]. -An exception zooms down this stack, throwing away all the call -contexts it encounters. - -(((error handling)))(((syntax)))(((catch keyword)))If exceptions -always zoomed right down to the bottom of the stack, they would not be -of much use. They would just provide a novel way to blow up your -program. Their power lies in the fact that you can set “obstacles” -along the stack to _catch_ the exception as it is zooming down. Then -you can do something with it, after which the program continues -running at the point where the exception was caught. - -Here's an example: - -[[look]] -[source,javascript] ----- -function promptDirection(question) { - var result = prompt(question, ""); - if (result.toLowerCase() == "left") return "L"; - if (result.toLowerCase() == "right") return "R"; - throw new Error("Invalid direction: " + result); -} - -function look() { - if (promptDirection("Which way?") == "L") - return "a house"; - else - return "two angry bears"; -} - -try { - console.log("You see", look()); -} catch (error) { - console.log("Something went wrong: " + error); -} ----- - -(((exception handling)))(((block)))(((throw keyword)))(((try -keyword)))(((catch keyword)))The `throw` keyword is used to raise an -exception. Catching one is done by wrapping a piece of code in a `try` -block, followed by the keyword `catch`. When the code in the `try` -block causes an exception to be raised, the `catch` block is -evaluated. The variable name (in parentheses) after `catch` will be -bound to the exception value. After the `catch` block finishes—or if -the `try` block finishes without problems—control proceeds beneath the -entire `try/catch` statement. - -(((debugging)))(((call stack)))(((Error type)))(((stack -trace)))In this case, we used the `Error` ((constructor)) to create -our exception value. This is a ((standard)) JavaScript constructor -that creates an object with a `message` property. In modern JavaScript -environments, instances of this constructor also gather information -about the call stack that existed when the exception was created, a -so-called _stack trace_. This information is stored in the `stack` -property and can be helpful when trying to debug a problem: it -tells us the precise function where the problem occurred and which -other functions led up to the call that failed. - -(((exception handling)))Note that the function `look` completely -ignores the possibility that `promptDirection` might go wrong. This is -the big advantage of exceptions—error-handling code is necessary only -at the point where the error occurs and at the point where it is -handled. The functions in between can forget all about it. - -Well, almost... - -== Cleaning up after exceptions == - -(((exception handling)))(((cleaning up)))(((withContext -function)))(((dynamic scope)))Consider the following situation: a -function, `withContext`, wants to make sure that, during its -execution, the top-level variable `context` holds a specific context -value. After it finishes, it restores this variable to its old value. - -// include_code - -[source,javascript] ----- -var context = null; - -function withContext(newContext, body) { - var oldContext = context; - context = newContext; - var result = body(); - context = oldContext; - return result; -} ----- - -What if `body` raises an exception? In that case, the call to -`withContext` will be thrown off the stack by the exception, and -`context` will never be set back to its old value. - -(((block)))(((try keyword)))(((finally keyword)))There is one more -feature that `try` statements have. They may be followed by a -`finally` block either instead of or in addition to a `catch` -block. A `finally` block means “No matter _what_ happens, run this -code after trying to run the code in the `try` block”. If a function -has to clean something up, the cleanup code should usually be put into -a `finally` block. - -// include_code - -[source,javascript] ----- -function withContext(newContext, body) { - var oldContext = context; - context = newContext; - try { - return body(); - } finally { - context = oldContext; - } -} ----- - -(((withContext function)))Note that we no longer have to store the -result of `body` (which we want to return) in a variable. Even if we -return directly from the `try` block, the `finally` block will be run. -Now we can do this and be safe: - -// test: no - -[source,javascript] ----- -try { - withContext(5, function() { - if (context < 10) - throw new Error("Not enough context!"); - }); -} catch (e) { - console.log("Ignoring: " + e); -} -// → Ignoring: Error: Not enough context! - -console.log(context); -// → null ----- - -Even though the function called from `withContext` exploded, -`withContext` itself still properly cleaned up the `context` variable. - -== Selective catching == - -(((uncaught exception)))(((exception handling)))(((JavaScript -console)))(((developer tools)))(((call stack)))(((error)))When an -exception makes it all the way to the bottom of the stack without -being caught, it gets handled by the environment. What this means -differs between environments. In browsers, a description of the error -typically gets written to the JavaScript console (reachable through -the browser's Tools or Developer menu). - -(((crash)))(((error handling)))For programmer mistakes or problems -that the program cannot possibly handle, just letting the error go -through is often okay. An unhandled exception is a reasonable way to -signal a broken program, and the JavaScript console will, on modern -browsers, provide you with some information about which function calls -were on the stack when the problem occurred. - -(((user interface)))For problems that are _expected_ to happen during -routine use, crashing with an unhandled exception is not a very -friendly response. - -(((syntax)))(((function,application)))(((exception handling)))(((Error -type)))Invalid uses of the language, such as referencing a nonexistent -((variable)), looking up a property on `null`, or calling something -that's not a function, will also result in exceptions being raised. -Such exceptions can be caught just like your own exceptions. - -(((catch keyword)))When a `catch` body is entered, all we know is that -_something_ in our `try` body caused an exception. But we don't know -_what_, or _which_ exception it caused. - -(((exception handling)))JavaScript (in a rather glaring omission) -doesn't provide direct support for selectively catching exceptions: -either you catch them all or you don't catch any. This makes it very -easy to _assume_ that the exception you get is the one you were -thinking about when you wrote the `catch` block. - -(((promptDirection function)))But it might not be. Some other -((assumption)) might be violated, or you might have introduced a bug -somewhere that is causing an exception. Here is an example, which -_attempts_ to keep on calling `promptDirection` until it gets a valid -answer: - -// test: no - -[source,javascript] ----- -for (;;) { - try { - var dir = promtDirection("Where?"); // ← typo! - console.log("You chose ", dir); - break; - } catch (e) { - console.log("Not a valid direction. Try again."); - } -} ----- - -(((infinite loop)))(((for loop)))(((catch keyword)))(((debugging)))The -`for (;;)` construct is a way to intentionally create a loop that -doesn't terminate on its own. We break out of the loop only when a -valid direction is given. _But_ we misspelled `promptDirection`, -which will result in an “undefined variable” error. Because the -`catch` block completely ignores its exception value (`e`), assuming -it knows what the problem is, it wrongly treats the variable error as -indicating bad input. Not only does this cause an infinite loop, but -it also “buries” the useful error message about the misspelled -variable. - -As a general rule, don't blanket-catch exceptions unless it is for the -purpose of “routing” them somewhere—for example, over the network to -tell another system that our program crashed. And even then, think -carefully about how you might be hiding information. - -(((exception handling)))So we want to catch a _specific_ kind of -exception. We can do this by checking in the `catch` block whether the -exception we got is the one we are interested in and by rethrowing it -otherwise. But how do we recognize an exception? - -Of course, we could match its `message` property against the ((error)) -message we happen to expect. But that's a shaky way to write code—we'd -be using information that's intended for human consumption (the -message) to make a programmatic decision. As soon as someone changes -(or translates) the message, the code will stop working. - -(((Error type)))(((instanceof operator)))Rather, let's define a new -type of error and use `instanceof` to identify it. - -// include_code - -[source,javascript] ----- -function InputError(message) { - this.message = message; - this.stack = (new Error()).stack; -} -InputError.prototype = Object.create(Error.prototype); -InputError.prototype.name = "InputError"; ----- - -(((throw keyword)))(((inheritance)))The prototype is made to derive -from `Error.prototype` so that `instanceof Error` will also return -true for `InputError` objects. It's also given a `name` property -since the standard error types (`Error`, `SyntaxError`, -`ReferenceError`, and so on) also have such a property. - -(((call stack)))The assignment to the `stack` property tries to give -this object a somewhat useful ((stack trace)), on platforms that -support it, by creating a regular error object and then using that -object's `stack` property as its own. - -(((promptDirection function)))Now `promptDirection` can throw such an -error. - -// include_code - -[source,javascript] ----- -function promptDirection(question) { - var result = prompt(question, ""); - if (result.toLowerCase() == "left") return "L"; - if (result.toLowerCase() == "right") return "R"; - throw new InputError("Invalid direction: " + result); -} ----- - -(((exception handling)))And the loop can catch it more carefully. - -// test: no - -[source,javascript] ----- -for (;;) { - try { - var dir = promptDirection("Where?"); - console.log("You chose ", dir); - break; - } catch (e) { - if (e instanceof InputError) - console.log("Not a valid direction. Try again."); - else - throw e; - } -} ----- - -(((debugging)))This will catch only instances of `InputError` and let -unrelated exceptions through. If you reintroduce the typo, the -undefined variable error will be properly reported. - -== Assertions == - -(((assert function)))(((assertion)))(((debugging)))_Assertions_ are a -tool to do basic sanity checking for programmer errors. Consider this -helper function, `assert`: - -[source,javascript] ----- -function AssertionFailed(message) { - this.message = message; -} -AssertionFailed.prototype = Object.create(Error.prototype); - -function assert(test, message) { - if (!test) - throw new AssertionFailed(message); -} - -function lastElement(array) { - assert(array.length > 0, "empty array in lastElement"); - return array[array.length - 1]; -} ----- - -(((validation)))(((run-time -error)))(((crash)))(((assumption)))(((array)))This provides a -compact way to enforce expectations, helpfully blowing up the program -if the stated condition does not hold. For instance, the `lastElement` -function, which fetches the last element from an array, would return -`undefined` on empty arrays if the assertion was omitted. Fetching the -last element from an empty array does not make much sense, so it is -almost certainly a programmer error to do so. - -(((assertion)))(((debugging)))Assertions are a way to make sure -mistakes cause failures at the point of the mistake, rather than -silently producing nonsense values that may go on to cause trouble in -an unrelated part of the system. - -== Summary == - -Mistakes and bad input are facts of life. Bugs in programs need to be -found and fixed. They can become easier to notice by having automated -test suites and adding assertions to your programs. - -Problems caused by factors outside the program's control should -usually be handled gracefully. Sometimes, when the problem can be -handled locally, special return values are a sane way to track them. -Otherwise, exceptions are preferable. - -Throwing an exception causes the call stack to be unwound until the -next enclosing `try/catch` block or until the bottom of the stack. -The exception value will be given to the `catch` block that catches -it, which should verify that it is actually the expected kind of -exception and then do something with it. To deal with the -unpredictable control flow caused by exceptions, `finally` blocks can -be used to ensure a piece of code is _always_ run when a block -finishes. - -== Exercises == - -=== Retry === - -(((primitiveMultiply (exercise))))(((exception handling)))(((throw -keyword)))Say you have a function `primitiveMultiply` that, in 50 percent of -cases, multiplies two numbers, and in the other 50 percent, raises an -exception of type `MultiplicatorUnitFailure`. Write a function that -wraps this clunky function and just keeps trying until a call -succeeds, after which it returns the result. - -(((catch keyword)))Make sure you handle only the exceptions you -are trying to handle. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -function MultiplicatorUnitFailure() {} - -function primitiveMultiply(a, b) { - if (Math.random() < 0.5) - return a * b; - else - throw new MultiplicatorUnitFailure(); -} - -function reliableMultiply(a, b) { - // Your code here. -} - -console.log(reliableMultiply(8, 8)); -// → 64 ----- -endif::interactive_target[] - -!!hint!! - -(((primitiveMultiply (exercise))))(((try keyword)))(((catch -keyword)))(((throw keyword)))The call to `primitiveMultiply` should -obviously happen in a `try` block. The corresponding `catch` block -should rethrow the exception when it is not an instance of -`MultiplicatorUnitFailure` and ensure the call is retried when it is. - -To do the retrying, you can either use a loop that breaks only when a -call succeeds—as in the link:08_error.html#look[`look` example] -earlier in this chapter—or use ((recursion)) and hope you don't get a -string of failures so long that it overflows the stack (which is a -pretty safe bet). - -!!hint!! - -=== The locked box === - -(((locked box (exercise))))Consider the following (rather contrived) -object: - -// include_code - -[source,javascript] ----- -var box = { - locked: true, - unlock: function() { this.locked = false; }, - lock: function() { this.locked = true; }, - _content: [], - get content() { - if (this.locked) throw new Error("Locked!"); - return this._content; - } -}; ----- - -(((private property)))(((access control)))It is a ((box)) with a -lock. Inside is an array, but you can get at it only when the box is -unlocked. Directly accessing the `_content` property is not allowed. - -(((finally keyword)))(((exception handling)))Write a function called -`withBoxUnlocked` that takes a function value as argument, unlocks the -box, runs the function, and then ensures that the box is locked again -before returning, regardless of whether the argument function returned -normally or threw an exception. - -ifdef::interactive_target[] - -[source,javascript] ----- -function withBoxUnlocked(body) { - // Your code here. -} - -withBoxUnlocked(function() { - box.content.push("gold piece"); -}); - -try { - withBoxUnlocked(function() { - throw new Error("Pirates on the horizon! Abort!"); - }); -} catch (e) { - console.log("Error raised:", e); -} -console.log(box.locked); -// → true ----- - -For extra points, make sure that if you call `withBoxUnlocked` when -the box is already unlocked, the box stays unlocked. - -endif::interactive_target[] - -!!hint!! - -(((locked box (exercise))))(((finally keyword)))(((try keyword)))This -exercise calls for a `finally` block, as you probably guessed. Your -function should first unlock the box and then call the argument function -from inside a `try` body. The `finally` block after it should lock the -box again. - -To make sure we don't lock the box when it wasn't already locked, -check its lock at the start of the function and unlock and lock -it only when it started out locked. - -!!hint!! diff --git a/09_regexp.txt b/09_regexp.txt deleted file mode 100644 index 5f0ea1414..000000000 --- a/09_regexp.txt +++ /dev/null @@ -1,1323 +0,0 @@ -:chap_num: 9 -:prev_link: 08_error -:next_link: 10_modules - -= Regular Expressions = - -[chapterquote="true"] -[quote,Jamie Zawinski] -____ -Some people, when confronted with a -problem, think ‘I know, I'll use regular expressions.’ Now they have -two problems. -____ - -ifdef::interactive_target[] - -[chapterquote="true"] -[quote, Master Yuan-Ma, The Book of Programming] -____ -Yuan-Ma said, ‘When you cut against the grain of the wood, much -strength is needed. When you program against the grain of a problem, -much code is needed.’ -____ - -endif::interactive_target[] - -(((Zawinski+++,+++ -Jamie)))(((evolution)))(((adoption)))(((integration)))Programming -((tool))s and techniques survive and spread in a chaotic, evolutionary -way. It's not always the pretty or brilliant ones that win but rather -the ones that function well enough within the right niche—for example, -by being integrated with another successful piece of technology. - -(((domain-specific language)))In this chapter, I will discuss one such -tool, _((regular expression))s_. Regular expressions are a way to -describe ((pattern))s in string data. They form a small, separate -language that is part of JavaScript and many other languages and -tools. - -(((interface,design)))Regular expressions are both terribly awkward -and extremely useful. Their syntax is cryptic, and the programming -((interface)) JavaScript provides for them is clumsy. But they are a -powerful ((tool)) for inspecting and processing strings. Properly -understanding regular expressions will make you a more effective -programmer. - -== Creating a regular expression == - -(((regular expression,creation)))(((RegExp constructor)))(((literal -expression)))(((slash character)))A regular expression is a type of -object. It can either be constructed with the `RegExp` constructor or -written as a literal value by enclosing the pattern in forward slash -(`/`) characters. - -[source,javascript] ----- -var re1 = new RegExp("abc"); -var re2 = /abc/; ----- - -Both of these regular expression objects represent the same -((pattern)): an _a_ character followed by a _b_ followed by a _c_. - -(((backslash character)))(((RegExp constructor)))When using the -`RegExp` constructor, the pattern is written as a normal string, so -the usual rules apply for backslashes. - -(((regular expression,escaping)))(((escaping,in regexps)))(((slash -character)))The second notation, where the pattern appears between -slash characters, treats backslashes somewhat differently. First, -since a forward slash ends the pattern, we need to put a backslash -before any forward slash that we want to be _part_ of the pattern. In -addition, backslashes that aren't part of special character codes -(like `\n`) will be _preserved_, rather than ignored as they are in -strings, and change the meaning of the pattern. Some characters, such -as question marks and plus signs, have special meanings in regular -expressions and must be preceded by a backslash if they are meant to -represent the character itself. - -[source,javascript] ----- -var eighteenPlus = /eighteen\+/; ----- - -Knowing precisely what characters to backslash-escape when writing -regular expressions requires you to know every character with a -special meaning. For the time being, this may not be realistic, so -when in doubt, just put a backslash before any character that is not a -letter, number, or ((whitespace)). - -== Testing for matches == - -(((matching)))(((test method)))(((regular expression,methods)))Regular -expression objects have a number of methods. The simplest one is -`test`. If you pass it a string, it will return a ((Boolean)) telling -you whether the string contains a match of the pattern in the -expression. - -[source,javascript] ----- -console.log(/abc/.test("abcde")); -// → true -console.log(/abc/.test("abxde")); -// → false ----- - -(((pattern)))A ((regular expression)) consisting of only nonspecial -characters simply represents that sequence of characters. If _abc_ -occurs anywhere in the string we are testing against (not just at the -start), `test` will return `true`. - -== Matching a set of characters == - -(((regular expression)))(((indexOf method)))Finding out whether a -string contains _abc_ could just as well be done with a call to -`indexOf`. Regular expressions allow us to go beyond that and express -more complicated ((pattern))s. - -Say we want to match any ((number)). In a regular expression, putting -a ((set)) of characters between square brackets makes that part of the -expression match any of the characters between the brackets. - -Both of the following expressions match all strings that contain a ((digit)): - -[source,javascript] ----- -console.log(/[0123456789]/.test("in 1992")); -// → true -console.log(/[0-9]/.test("in 1992")); -// → true ----- - -(((dash character)))Within square brackets, a dash (`-`) between two -characters can be used to indicate a ((range)) of characters, where -the ordering is determined by the character's ((Unicode)) number. -Characters 0 to 9 sit right next to each other in this ordering -(codes 48 to 57), so `[0-9]` covers all of them and matches any -((digit)). - -(((whitespace)))(((alphanumeric character)))(((period -character)))There are a number of common character groups that have -their own built-in shortcuts. Digits are one of them: `\d` means the -same thing as `[0-9]`. - -[cols="1,5"] -|==== -|`\d` |Any ((digit)) character -|`\w` |An alphanumeric character (“((word character))”) -|`\s` |Any ((whitespace)) character (space, tab, newline, and similar) -|`\D` |A character that is _not_ a digit -|`\W` |A nonalphanumeric character -|`\S` |A nonwhitespace character -|`.` |Any character except for newline(((newline character))) -|==== - -So you could match a ((date)) and ((time)) format like 30-01-2003 -15:20 with the following expression: - -[source,javascript] ----- -var dateTime = /\d\d-\d\d-\d\d\d\d \d\d:\d\d/; -console.log(dateTime.test("30-01-2003 15:20")); -// → true -console.log(dateTime.test("30-jan-2003 15:20")); -// → false ----- - -(((backslash character)))That looks completely awful, doesn't it? It has way too -many backslashes, producing background noise that makes it hard to -spot the actual ((pattern)) expressed. We'll see a slightly improved -version of this expression -link:09_regexp.html#date_regexp_counted[later]. - -(((escaping,in regexps)))(((regular expression)))(((set)))These -backslash codes can also be used inside ((square brackets)). For -example, `[\d.]` means any digit or a period character. But note that -the period itself, when used between square brackets, loses its -special meaning. The same goes for other special characters, such as -`+`. - -(((square brackets)))(((inversion)))(((caret character)))To _invert_ a -set of characters—that is, to express that you want to match any -character _except_ the ones in the set—you can write a caret (`^`) -character after the opening bracket. - -[source,javascript] ----- -var notBinary = /[^01]/; -console.log(notBinary.test("1100100010100110")); -// → false -console.log(notBinary.test("1100100010200110")); -// → true ----- - -== Repeating parts of a pattern == - -(((regular expression,repetition)))We now know how to match a single digit. What -if we want to match a whole number—a ((sequence)) of one or more -((digit))s? - -(((plus character)))(((repetition)))(((+ operator)))When you put a -plus sign (`+`) after something in a regular expression, it indicates -that the element may be repeated more than once. Thus, `/\d+/` matches -one or more digit characters. - -[source,javascript] ----- -console.log(/'\d+'/.test("'123'")); -// → true -console.log(/'\d+'/.test("''")); -// → false -console.log(/'\d*'/.test("'123'")); -// → true -console.log(/'\d*'/.test("''")); -// → true ----- - -(((pass:[*] operator)))(((asterisk)))The star (`*`) has a similar -meaning but also allows the pattern to match zero times. Something -with a star after it never prevents a pattern from matching—it'll just -match zero instances if it can't find any suitable text to match. - -(((British English)))(((American English)))(((question mark)))A -question mark makes a part of a pattern “((optional))”, meaning it may -occur zero or one time. In the following example, the _u_ character -is allowed to occur, but the pattern also matches when it is missing. - -[source,javascript] ----- -var neighbor = /neighbou?r/; -console.log(neighbor.test("neighbour")); -// → true -console.log(neighbor.test("neighbor")); -// → true ----- - -(((repetition)))(((curly braces)))To indicate that a pattern should -occur a precise number of times, use curly braces. Putting `{4}` after -an element, for example, requires it to occur exactly four times. It -is also possible to specify a ((range)) this way: `{2,4}` means the -element must occur at least twice and at most four times. - -[[date_regexp_counted]] -Here is another version of the ((date)) and ((time)) pattern that -allows both single- and double-((digit)) days, months, and hours. It -is also slightly more readable. - -[source,javascript] ----- -var dateTime = /\d{1,2}-\d{1,2}-\d{4} \d{1,2}:\d{2}/; -console.log(dateTime.test("30-1-2003 8:45")); -// → true ----- - -You can also specify open-ended ((range))s when using ((curly braces)) -by omitting the number after the comma. So `{5,}` means five or more -times. - -== Grouping subexpressions == - -(((regular expression,grouping)))(((grouping)))To use an operator like `*` or -`+` on more than one element at a time, you can use ((parentheses)). A -part of a regular expression that is enclosed in parentheses counts -as a single element as far as the operators following it are -concerned. - -[source,javascript] ----- -var cartoonCrying = /boo+(hoo+)+/i; -console.log(cartoonCrying.test("Boohoooohoohooo")); -// → true ----- - -(((crying)))The first and second `+` characters apply only to the -second _o_ in _boo_ and _hoo_, respectively. The third `+` applies to -the whole group `(hoo+)`, matching one or more sequences like that. - -(((case sensitivity)))(((capitalization)))(((regular -expression,flags)))The `i` at the end of the expression in the -previous example makes this regular expression case insensitive, allowing it to -match the uppercase _B_ in the input string, even though the pattern -is itself all lowercase. - -== Matches and groups == - -(((regular expression,grouping)))(((exec method)))(((array)))The `test` method -is the absolute simplest way to match a regular expression. It -tells you only whether it matched and nothing else. Regular expressions -also have an `exec` (execute) method that will return `null` if no -match was found and return an object with information about the match -otherwise. - -[source,javascript] ----- -var match = /\d+/.exec("one two 100"); -console.log(match); -// → ["100"] -console.log(match.index); -// → 8 ----- - -(((index property)))(((string,indexing)))An object returned from -`exec` has an `index` property that tells us _where_ in the string the -successful match begins. Other than that, the object looks like (and -in fact is) an array of strings, whose first element is the string -that was matched—in the previous example, this is the sequence of -((digit))s that we were looking for. - -(((string,methods)))(((match method)))String values have a `match` -method that behaves similarly. - -[source,javascript] ----- -console.log("one two 100".match(/\d+/)); -// → ["100"] ----- - -(((grouping)))(((capture group)))(((exec method)))When the regular -expression contains subexpressions grouped with parentheses, the text -that matched those groups will also show up in the array. The whole -match is always the first element. The next element is the part -matched by the first group (the one whose opening parenthesis comes -first in the expression), then the second group, and so on. - -[source,javascript] ----- -var quotedText = /'([^']*)'/; -console.log(quotedText.exec("she said 'hello'")); -// → ["'hello'", "hello"] ----- - -(((capture group)))When a group does not end up being matched at all -(for example, when followed by a question mark), its position in the -output array will hold `undefined`. Similarly, when a group is matched -multiple times, only the last match ends up in the array. - -[source,javascript] ----- -console.log(/bad(ly)?/.exec("bad")); -// → ["bad", undefined] -console.log(/(\d)+/.exec("123")); -// → ["123", "3"] ----- - -(((exec method)))(((regular -expression,methods)))(((extraction)))Groups can be useful for -extracting parts of a string. If we don't just want to verify whether -a string contains a ((date)) but also extract it and construct an -object that represents it, we can wrap parentheses around the digit -patterns and directly pick the date out of the result of `exec`. - -But first, a brief detour, in which we discuss the preferred way to -store date and ((time)) values in JavaScript. - -== The date type == - -(((constructor)))(((Date constructor)))JavaScript has a standard -object type for representing ((date))s—or rather, points in ((time)). -It is called `Date`. If you simply create a date object using `new`, -you get the current date and time. - -// test: no - -[source,javascript] ----- -console.log(new Date()); -// → Wed Dec 04 2013 14:24:57 GMT+0100 (CET) ----- - -(((Date constructor)))You can also create an object for a specific -time. - -[source,javascript] ----- -console.log(new Date(2009, 11, 9)); -// → Wed Dec 09 2009 00:00:00 GMT+0100 (CET) -console.log(new Date(2009, 11, 9, 12, 59, 59, 999)); -// → Wed Dec 09 2009 12:59:59 GMT+0100 (CET) ----- - -(((zero-based counting)))(((interface,design)))JavaScript uses a -convention where month numbers start at zero (so December is 11), yet -day numbers start at one. This is confusing and silly. Be careful. - -The last four arguments (hours, minutes, seconds, and milliseconds) -are optional and taken to be zero when not given. - -(((getTime method)))Timestamps are stored as the number of -milliseconds since the start of 1970, using negative numbers for -times before 1970 (following a convention set by “((Unix time))”, -which was invented around that time). The `getTime` method on a date object -returns this number. It is big, as you can imagine. - -[source,javascript] ----- -console.log(new Date(2013, 11, 19).getTime()); -// → 1387407600000 -console.log(new Date(1387407600000)); -// → Thu Dec 19 2013 00:00:00 GMT+0100 (CET) ----- - -(((Date.now function)))(((Date constructor)))If you give the `Date` -constructor a single argument, that argument is treated as such -a millisecond count. You can get the current millisecond count by -creating a new `Date` object and calling `getTime` on it but also by -calling the `Date.now` function. - -(((getFullYear method)))(((getMonth method)))(((getDate -method)))(((getHours method)))(((getMinutes method)))(((getSeconds -method)))(((getYear method)))Date objects provide methods like -`getFullYear`, `getMonth`, `getDate`, `getHours`, `getMinutes`, and -`getSeconds` to extract their components. There's also `getYear`, -which gives you a rather useless two-digit year value (such as `93` or -`14`). - -(((capture group)))Putting ((parentheses)) around the parts of the -expression that we are interested in, we can now easily create a date -object from a string. - -[source,javascript] ----- -function findDate(string) { - var dateTime = /(\d{1,2})-(\d{1,2})-(\d{4})/; - var match = dateTime.exec(string); - return new Date(Number(match[3]), - Number(match[2]) - 1, - Number(match[1])); -} -console.log(findDate("30-1-2003")); -// → Thu Jan 30 2003 00:00:00 GMT+0100 (CET) ----- - -== Word and string boundaries == - -(((matching)))(((regular expression,boundary)))Unfortunately, -`findDate` will also happily extract the nonsensical date 00-1-3000 -from the string `"100-1-30000"`. A match may happen anywhere in the -string, so in this case, it'll just start at the second character and -end at the second-to-last character. - -(((boundary)))(((caret character)))(((dollar sign)))If we want to -enforce that the match must span the whole string, we can add the -markers `^` and `$`. The caret matches the start of the input string, -while the dollar sign matches the end. So, `/^\d+$/` matches a string -consisting entirely of one or more digits, `/^!/` matches any string -that starts with an exclamation mark, and `/x^/` does not match any -string (there cannot be an _x_ before the start of the string). - -(((word boundary)))(((word character)))If, on the other hand, we just -want to make sure the date starts and ends on a word boundary, we can -use the marker `\b`. A word boundary can be the start or end of the -string or any point in the string that has a word character (as in -`\w`) on one side and a nonword character on the other. - -[source,javascript] ----- -console.log(/cat/.test("concatenate")); -// → true -console.log(/\bcat\b/.test("concatenate")); -// → false ----- - -(((matching)))Note that a boundary marker doesn't represent an actual -character. It just enforces that the regular expression matches only -when a certain condition holds at the place where it appears in the -pattern. - -== Choice patterns == - -(((branching)))(((regular expression,alternatives)))(((farm -example)))Say we want to know whether a piece of text contains not -only a number but a number followed by one of the words _pig_, _cow_, -or _chicken_, or any of their plural forms. - -We could write three regular expressions and test them in turn, but -there is a nicer way. The ((pipe character)) (`|`) denotes a -((choice)) between the pattern to its left and the pattern to its -right. So I can say this: - -[source,javascript] ----- -var animalCount = /\b\d+ (pig|cow|chicken)s?\b/; -console.log(animalCount.test("15 pigs")); -// → true -console.log(animalCount.test("15 pigchickens")); -// → false ----- - -(((parentheses)))Parentheses can be used to limit the part of the -pattern that the pipe operator applies to, and you can put multiple -such operators next to each other to express a choice between more -than two patterns. - -== The mechanics of matching == - -(((regular expression,matching)))(((matching,algorithm)))Regular -expressions can be thought of as ((flow diagram))s. This is the -diagram for the livestock expression in the previous example: - -image::img/re_pigchickens.svg[alt="Visualization of /\\b\\d+ (pig|cow|chicken)s?\\b/"] - -(((traversal)))Our expression matches a string if we can find a path -from the left side of the diagram to the right side. We keep -a current position in the string, and every time we move through a -box, we verify that the part of the string after our current position -matches that box. - -So if we try to match `"the 3 pigs"` with our regular expression, -our progress through the flow chart would look like this: - - - At position 4, there is a word ((boundary)), so we can move past - the first box. - - - Still at position 4, we find a digit, so we can also move past the - second box. - - - At position 5, one path loops back to before the second (digit) box, - while the other moves forward through the box that holds a single space - character. There is a space here, not a digit, so we must take the - second path. - - - We are now at position 6 (the start of “pigs”) and at the three-way - branch in the diagram. We don't see “cow” or “chicken” here, but we - do see “pig”, so we take that branch. - - - At position 9, after the three-way branch, one path skips - the _s_ box and goes straight to the final word boundary, while the other path - matches an _s_. There is an _s_ character here, not a word boundary, - so we go through the _s_ box. - - - We're at position 10 (the end of the string) and can match only a - word ((boundary)). The end of a string counts as a word boundary, - so we go through the last box and have successfully matched this - string. - -(((regular -expression,matching)))(((matching,algorithm)))(((searching)))Conceptually, -a regular expression engine looks for a match in a string as follows: -it starts at the start of the string and tries a match there. In this -case, there _is_ a word boundary there, so it'd get past the first -box—but there is no digit, so it'd fail at the second box. Then it -moves on to the second character in the string and tries to begin a -new match there... and so on, until it finds a match or reaches the end -of the string and decides that there really is no match. - -[[backtracking]] -== Backtracking == - -(((regular expression,backtracking)))(((binary number)))(((decimal -number)))(((hexadecimal number)))(((flow -diagram)))(((matching,algorithm)))(((backtracking)))The regular -expression `/\b([01]+b|\d+|[\da-f]+h)\b/` matches either a binary -number followed by a _b_, a regular decimal number with no suffix -character, or a hexadecimal number (that is, base 16, with the letters -_a_ to _f_ standing for the digits 10 to 15) followed by an _h_. This -is the corresponding diagram: - -image::img/re_number.svg[alt="Visualization of /\\b([01]+b|\\d+|[\\da-f]+h)\\b/"] - -(((branching)))When matching this expression, it will often happen -that the top (binary) branch is entered even though the input does not -actually contain a binary number. When matching the string `"103"`, -for example, it becomes clear only at the 3 that we are in the wrong -branch. The string _does_ match the expression, just not the branch we -are currently in. - -(((backtracking)))(((searching)))So the matcher _backtracks_. When -entering a branch, it remembers its current position (in this -case, at the start of the string, just past the first boundary box in -the diagram) so that it can go back and try another branch if the -current one does not work out. For the string `"103"`, after -encountering the 3 character, it will start trying the branch for -decimal numbers. This one matches, so a match is reported after all. - -(((matching,algorithm)))The matcher stops as soon as it finds a full -match. This means that if multiple branches could potentially match a -string, only the first one (ordered by where the branches appear in -the regular expression) is used. - -Backtracking also happens for ((repetition)) operators like + and `*`. -If you match `/^.*x/` against `"abcxe"`, the `.*` part will first try -to consume the whole string. The engine will then realize that it -needs an _x_ to match the pattern. Since there is no _x_ past the end -of the string, the star operator tries to match one character less. -But the matcher doesn't find an _x_ after `abcx` either, so it -backtracks again, matching the star operator to just `abc`. _Now_ it -finds an _x_ where it needs it and reports a successful match from -positions 0 to 4. - -(((performance)))(((complexity)))It is possible to write regular -expressions that will do a _lot_ of backtracking. This problem occurs -when a pattern can match a piece of input in many different ways. For -example, if we get confused while writing a binary-number regular expression, we -might accidentally write something like `/([01]+)+b/`. - -image::img/re_slow.svg[alt="Visualization of /([01]+)+b/",width="6cm"] - -(((inner loop)))(((nesting,in regexps)))If that tries to match some -long series of zeros and ones with no trailing _b_ character, the -matcher will first go through the inner loop until it runs out of -digits. Then it notices there is no _b_, so it backtracks one -position, goes through the outer loop once, and gives up again, trying -to backtrack out of the inner loop once more. It will continue to try -every possible route through these two loops. This means the amount of -work _doubles_ with each additional character. For even just a few -dozen characters, the resulting match will take practically forever. - -== The replace method == - -(((replace method)))(((regular expression)))String values have a -`replace` method, which can be used to replace part of the string -with another string. - -[source,javascript] ----- -console.log("papa".replace("p", "m")); -// → mapa ----- - -(((regular expression,flags)))(((regular expression,global)))The first -argument can also be a regular expression, in which case the first -match of the regular expression is replaced. When a `g` option (for -_global_) is added to the regular expression, _all_ matches in the -string will be replaced, not just the first. - -[source,javascript] ----- -console.log("Borobudur".replace(/[ou]/, "a")); -// → Barobudur -console.log("Borobudur".replace(/[ou]/g, "a")); -// → Barabadar ----- - -(((interface,design)))(((argument)))It would have been sensible if the -choice between replacing one match or all matches was made through an -additional argument to `replace` or by providing a different method, -`replaceAll`. But for some unfortunate reason, the choice relies on a -property of the regular expression instead. - -(((grouping)))(((capture group)))(((dollar sign)))(((replace -method)))(((regular expression,grouping)))The real power of using -regular expressions with `replace` comes from the fact that we can -refer back to matched groups in the replacement string. For example, -say we have a big string containing the names of people, one name per -line, in the format `Lastname, Firstname`. If we want to swap these -names and remove the comma to get a simple `Firstname Lastname` -format, we can use the following code: - -[source,javascript] ----- -console.log( - "Hopper, Grace\nMcCarthy, John\nRitchie, Dennis" - .replace(/([\w ]+), ([\w ]+)/g, "$2 $1")); -// → Grace Hopper -// John McCarthy -// Dennis Ritchie ----- - -The `$1` and `$2` in the replacement string refer to the parenthesized -groups in the pattern. `$1` is replaced by the text that matched -against the first group, `$2` by the second, and so on, up to `$9`. -The whole match can be referred to with `$&`. - -(((function,higher-order)))(((grouping)))(((capture group)))It is also -possible to pass a function, rather than a string, as the second -argument to `replace`. For each replacement, the function will be -called with the matched groups (as well as the whole match) as -arguments, and its return value will be inserted into the new string. - -Here's a simple example: - -[source,javascript] ----- -var s = "the cia and fbi"; -console.log(s.replace(/\b(fbi|cia)\b/g, function(str) { - return str.toUpperCase(); -})); -// → the CIA and FBI ----- - -And here's a more interesting one: - -[source,javascript] ----- -var stock = "1 lemon, 2 cabbages, and 101 eggs"; -function minusOne(match, amount, unit) { - amount = Number(amount) - 1; - if (amount == 1) // only one left, remove the 's' - unit = unit.slice(0, unit.length - 1); - else if (amount == 0) - amount = "no"; - return amount + " " + unit; -} -console.log(stock.replace(/(\d+) (\w+)/g, minusOne)); -// → no lemon, 1 cabbage, and 100 eggs ----- - -This takes a string, finds all occurrences of a number followed by an -alphanumeric word, and returns a string wherein every such occurrence -is decremented by one. - -The `(\d+)` group ends up as the `amount` argument to the function, -and the `(\w+)` group gets bound to `unit`. The function converts -`amount` to a number—which always works, since it matched `\d+`—and -makes some adjustments in case there is only one or zero left. - -== Greed == - -(((greed)))(((regular expression)))It isn't hard to use `replace` to -write a function that removes all ((comment))s from a piece of -JavaScript ((code)). Here is a first attempt: - -// test: wrap - -[source,javascript] ----- -function stripComments(code) { - return code.replace(/\/\/.*|\/\*[^]*\*\//g, ""); -} -console.log(stripComments("1 + /* 2 */3")); -// → 1 + 3 -console.log(stripComments("x = 10;// ten!")); -// → x = 10; -console.log(stripComments("1 /* a */+/* b */ 1")); -// → 1 1 ----- - -(((period character)))(((slash character)))(((newline -character)))(((empty set)))(((block comment)))(((line comment)))The -part before the _or_ operator simply matches two slash characters -followed by any number of non-newline characters. The part for -multiline comments is more involved. We use `[^]` (any character that -is not in the empty set of characters) as a way to match any -character. We cannot just use a dot here because block comments can -continue on a new line, and dots do not match the newline character. - -But the output of the previous example appears to have gone wrong. Why? - -(((backtracking)))(((greed)))(((regular expression)))The `[^]*` part of -the expression, as I described in the section on backtracking, will -first match as much as it can. If that causes the next part of the -pattern to fail, the matcher moves back one character and tries again -from there. In the example, the matcher first tries to match the whole -rest of the string and then moves back from there. It will find an -occurrence of `*/` after going back four characters and match that. -This is not what we wanted—the intention was to match a single -comment, not to go all the way to the end of the code and find the end -of the last block comment. - -Because of this behavior, we say the repetition operators (`+`, `*`, -`?`, and `{}`) are _((greed))y_, meaning they match as much as they -can and backtrack from there. If you put a ((question mark)) after -them (`+?`, `*?`, `??`, `{}?`), they become nongreedy and start by -matching as little as possible, matching more only when the remaining -pattern does not fit the smaller match. - -And that is exactly what we want in this case. By having the star -match the smallest stretch of characters that brings us to a `*/`, -we consume one block comment and nothing more. - -// test: wrap - -[source,javascript] ----- -function stripComments(code) { - return code.replace(/\/\/.*|\/\*[^]*?\*\//g, ""); -} -console.log(stripComments("1 /* a */+/* b */ 1")); -// → 1 + 1 ----- - -A lot of ((bug))s in ((regular expression)) programs can be traced to -unintentionally using a greedy operator where a nongreedy one would -work better. When using a ((repetition)) operator, consider the -nongreedy variant first. - -== Dynamically creating RegExp objects == - -(((regular expression,creation)))(((underscore character)))(((RegExp -constructor)))There are cases where you might not know the exact -((pattern)) you need to match against when you are writing your code. -Say you want to look for the user's name in a piece of text and -enclose it in underscore characters to make it stand out. Since you -will know the name only once the program is actually running, you -can't use the slash-based notation. - -But you can build up a string and use the `RegExp` ((constructor)) on -that. Here's an example: - -[source,javascript] ----- -var name = "harry"; -var text = "Harry is a suspicious character."; -var regexp = new RegExp("\\b(" + name + ")\\b", "gi"); -console.log(text.replace(regexp, "_$1_")); -// → _Harry_ is a suspicious character. ----- - -(((regular expression,flags)))(((backslash character)))When creating -the `\b` ((boundary)) markers, we have to use two backslashes because -we are writing them in a normal string, not a slash-enclosed regular -expression. The second argument to the `RegExp` constructor contains -the options for the regular expression—in this case `"gi"` for global -and case-insensitive. - -But what if the name is `"dea+hl[]rd"` because our user is a ((nerd))y -teenager? That would result in a nonsensical regular expression, which -won't actually match the user's name. - -(((backslash character)))(((escaping,in regexps)))(((regular -expression,escaping)))To work around this, we can add backslashes -before any character that we don't trust. Adding backslashes before -alphabetic characters is a bad idea because things like `\b` and `\n` -have a special meaning. But escaping everything that's not -alphanumeric or ((whitespace)) is safe. - -[source,javascript] ----- -var name = "dea+hl[]rd"; -var text = "This dea+hl[]rd guy is super annoying."; -var escaped = name.replace(/[^\w\s]/g, "\\$&"); -var regexp = new RegExp("\\b(" + escaped + ")\\b", "gi"); -console.log(text.replace(regexp, "_$1_")); -// → This _dea+hl[]rd_ guy is super annoying. ----- - -== The search method == - -(((searching)))(((regular expression,methods)))(((indexOf -method)))(((search method)))The `indexOf` method on strings cannot be -called with a regular expression. But there is another method, -`search`, which does expect a regular expression. Like `indexOf`, it -returns the first index on which the expression was found, or -1 when -it wasn't found. - -[source,javascript] ----- -console.log(" word".search(/\S/)); -// → 2 -console.log(" ".search(/\S/)); -// → -1 ----- - -Unfortunately, there is no way to indicate that the match should start -at a given offset (like we can with the second argument to `indexOf`), -which would often be useful. - -== The lastIndex property == - -(((exec method)))(((regular expression)))The `exec` method similarly -does not provide a convenient way to start searching from a given -position in the string. But it does provide an __in__convenient way. - -(((regular expression,matching)))(((matching)))(((source -property)))(((lastIndex property)))Regular expression objects have -properties. One such property is `source`, which contains the string -that expression was created from. Another property is `lastIndex`, -which controls, in some limited circumstances, where the next match -will start. - -(((interface,design)))(((exec method)))(((regular -expression,global)))Those circumstances are that the regular -expression must have the global (`g`) option enabled, and the match -must happen through the `exec` method. Again, a more sane solution -would have been to just allow an extra argument to be passed to -`exec`, but sanity is not a defining characteristic of JavaScript's -regular expression interface. - -[source,javascript] ----- -var pattern = /y/g; -pattern.lastIndex = 3; -var match = pattern.exec("xyzzy"); -console.log(match.index); -// → 4 -console.log(pattern.lastIndex); -// → 5 ----- - -(((side effect)))(((lastIndex property)))If the match was successful, -the call to `exec` automatically updates the `lastIndex` property to -point after the match. If no match was found, `lastIndex` is set back -to zero, which is also the value it has in a newly constructed regular -expression object. - -(((bug)))When using a global regular expression value for multiple -`exec` calls, these automatic updates to the `lastIndex` property can -cause problems. Your regular expression might be accidentally starting -at an index that was left over from a previous call. - -[source,javascript] ----- -var digit = /\d/g; -console.log(digit.exec("here it is: 1")); -// → ["1"] -console.log(digit.exec("and now: 1")); -// → null ----- - -(((regular expression,global)))(((match method)))Another interesting -effect of the global option is that it changes the way the `match` -method on strings works. When called with a global expression, instead -of returning an array similar to that returned by `exec`, `match` will -find _all_ matches of the pattern in the string and return an array -containing the matched strings. - -[source,javascript] ----- -console.log("Banana".match(/an/g)); -// → ["an", "an"] ----- - -So be cautious with global regular expressions. The cases where they -are necessary—calls to `replace` and places where you want to -explicitly use ++lastIndex++—are typically the only places where you -want to use them. - -=== Looping over matches === - -(((lastIndex property)))(((exec method)))(((loop)))A common pattern is -to scan through all occurrences of a pattern in a string, in a way -that gives us access to the match object in the loop body, by using -`lastIndex` and `exec`. - -[source,javascript] ----- -var input = "A string with 3 numbers in it... 42 and 88."; -var number = /\b(\d+)\b/g; -var match; -while (match = number.exec(input)) - console.log("Found", match[1], "at", match.index); -// → Found 3 at 14 -// Found 42 at 33 -// Found 88 at 40 ----- - -(((while loop)))(((= operator)))This makes use of the fact that the -value of an ((assignment)) expression (`=`) is the assigned value. So -by using `match = number.exec(input)` as the condition in the `while` -statement, we perform the match at the start of each iteration, save -its result in a ((variable)), and stop looping when no more matches -are found. - -[[ini]] -== Parsing an INI file == - -(((comment)))(((file format)))(((enemies example)))(((ini file)))To -conclude the chapter, we'll look at a problem that calls for ((regular -expression))s. Imagine we are writing a program to automatically -harvest information about our enemies from the ((Internet)). (We will -not actually write that program here, just the part that reads the -((configuration)) file. Sorry to disappoint.) The configuration file -looks like this: - -[source,text/plain] ----- -searchengine=http://www.google.com/search?q=$1 -spitefulness=9.7 - -; comments are preceded by a semicolon... -; each section concerns an individual enemy -[larry] -fullname=Larry Doe -type=kindergarten bully -website=http://www.geocities.com/CapeCanaveral/11451 - -[gargamel] -fullname=Gargamel -type=evil sorcerer -outputdir=/home/marijn/enemies/gargamel ----- - -(((grammar)))The exact rules for this format (which is actually a -widely used format, usually called an _INI_ file) are as follows: - -- Blank lines and lines starting with semicolons are ignored. - -- Lines wrapped in `[` and `]` start a new ((section)). - -- Lines containing an alphanumeric identifier followed by an `=` - character add a setting to the current section. - -- Anything else is invalid. - -Our task is to convert a string like this into an array of objects, -each with a `name` property and an array of settings. We'll need one -such object for each section and one for the global settings at the -top. - -(((carriage return)))(((line break)))(((newline character)))Since the -format has to be processed ((line)) by line, splitting up the file -into separate lines is a good start. We used `string.split("\n")` to -do this in link:06_object.html#split[Chapter 6]. Some operating -systems, however, use not just a newline character to separate lines -but a carriage return character followed by a newline (`"\r\n"`). -Given that the `split` method also allows a regular expression as its -argument, we can split on a regular expression like `/\r?\n/` to split -in a way that allows both `"\n"` and `"\r\n"` between lines. - -[source,javascript] ----- -function parseINI(string) { - // Start with an object to hold the top-level fields - var currentSection = {name: null, fields: []}; - var categories = [currentSection]; - - string.split(/\r?\n/).forEach(function(line) { - var match; - if (/^\s*(;.*)?$/.test(line)) { - return; - } else if (match = line.match(/^\[(.*)\]$/)) { - currentSection = {name: match[1], fields: []}; - categories.push(currentSection); - } else if (match = line.match(/^(\w+)=(.*)$/)) { - currentSection.fields.push({name: match[1], - value: match[2]}); - } else { - throw new Error("Line '" + line + "' is invalid."); - } - }); - - return categories; -} ----- - -(((parseINI function)))(((parsing)))This code goes over every line in -the file, updating the “current section” object as it goes along. -First, it checks whether the line can be ignored, using the expression -`/^\s*(;.*)?$/`. Do you see how it works? The part between the -((parentheses)) will match comments, and the `?` will make sure it -also matches lines containing only whitespace. - -If the line is not a ((comment)), the code then checks whether the -line starts a new ((section)). If so, it creates a new current section -object, to which subsequent settings will be added. - -The last meaningful possibility is that the line is a normal setting, -which the code adds to the current section object. - -If a ((line)) matches none of these forms, the function throws an -error. - -(((caret character)))(((dollar sign)))(((boundary)))Note the recurring -use of `^` and `$` to make sure the expression matches the whole line, -not just part of it. Leaving these out results in code that mostly -works but behaves strangely for some input, which can be a difficult -bug to track down. - -(((if keyword)))(((assignment)))(((= operator)))The pattern `if (match -= string.match(...))` is similar to the trick of using an assignment -as the condition for `while`. You often aren't sure that your call to -`match` will succeed, so you can access the resulting object only -inside an `if` statement that tests for this. To not break the -pleasant chain of `if` forms, we assign the result of the match to a -variable and immediately use that assignment as the test in the `if` -statement. - -== International characters == - -(((internationalization)))(((Unicode)))(((regular -expression,internationalization)))Because of JavaScript's initial -simplistic implementation and the fact that this simplistic approach -was later set in stone as ((standard)) behavior, JavaScript's regular -expressions are rather dumb about characters that do not appear in the -English language. For example, as far as JavaScript's regular -expressions are concerned, a “((word character))” is only one of the -26 characters in the Latin alphabet (uppercase or lowercase) and, for -some reason, the underscore character. Things like _é_ or _β_, which -most definitely are word characters, will not match `\w` (and _will_ -match uppercase `\W`, the nonword category). - -(((whitespace)))By a strange historical accident, `\s` (whitespace) -does not have this problem and matches all characters that the -Unicode standard considers whitespace, including things like the -((nonbreaking space)) and the ((Mongolian vowel separator)). - -(((character category)))Some ((regular expression)) -((implementation))s in other programming languages have syntax to -match specific ((Unicode)) character categories, such as “all -uppercase letters”, “all punctuation”, or “control characters”. There -are plans to add support for such categories to JavaScript, but it -unfortunately looks like they won't be realized in the near ((future)). - -[[summary_regexp]] -== Summary == - -Regular expressions are objects that represent patterns in strings. -They use their own syntax to express these patterns. - -[cols="1,5"] -|==== -|`/abc/` |A sequence of characters -|`/[abc]/` |Any character from a set of characters -|`/[^abc]/` |Any character _not_ in a set of characters -|`/[0-9]/` |Any character in a range of characters -|`/x+/` |One or more occurrences of the pattern `x` -|`/x+?/` |One or more occurrences, nongreedy -|`/x*/` |Zero or more occurrences -|`/x?/` |Zero or one occurrence -|`/x{2,4}/` |Between two and four occurrences -|`/(abc)/` |A group -|++/a{brvbar}b{brvbar}c/++ |Any one of several patterns -|`/\d/` |Any digit character -|`/\w/` |An alphanumeric character (“word character”) -|`/\s/` |Any whitespace character -|`/./` |Any character except newlines -|`/\b/` |A word boundary -|`/^/` |Start of input -|`/$/` |End of input -|==== - -A regular expression has a method `test` to test whether a given -string matches it. It also has an `exec` method that, when a match is -found, returns an array containing all matched groups. Such an array -has an `index` property that indicates where the match started. - -Strings have a `match` method to match them against a regular -expression and a `search` method to search for one, returning only the -starting position of the match. Their `replace` method can replace -matches of a pattern with a replacement string. Alternatively, you can -pass a function to `replace`, which will be used to build up a -replacement string based on the match text and matched groups. - -Regular expressions can have options, which are written after -the closing slash. The `i` option makes the match case insensitive, -while the `g` option makes the expression _global_, which, among other -things, causes the `replace` method to replace all instances instead -of just the first. - -The `RegExp` constructor can be used to create a regular expression -value from a string. - -Regular expressions are a sharp ((tool)) with an awkward handle. They -simplify some tasks tremendously but can quickly become unmanageable -when applied to complex problems. Part of knowing how to use them is -resisting the urge to try to shoehorn things that they cannot sanely -express into them. - -== Exercises == - -(((debugging)))(((bug)))It is almost unavoidable that, in the course -of working on these exercises, you will get confused and frustrated by -some regular expression's inexplicable ((behavior)). Sometimes it -helps to enter your expression into an online tool like -https://www.debuggex.com/[_debuggex.com_] to see whether its -visualization corresponds to what you intended and to ((experiment)) -with the way it responds to various input strings. - -=== Regexp golf === - -(((program size)))(((code golf)))(((regexp golf (exercise))))_Code -golf_ is a term used for the game of trying to express a particular -program in as few characters as possible. Similarly, _regexp golf_ is -the practice of writing as tiny a regular expression as possible to -match a given pattern, and _only_ that pattern. - -(((boundary)))(((matching)))For each of the following items, write a ((regular -expression)) to test whether any of the given substrings occur in a -string. The regular expression should match only strings containing -one of the substrings described. Do not worry about word boundaries -unless explicitly mentioned. When your expression works, see whether you -can make it any smaller. - - 1. _car_ and _cat_ - 2. _pop_ and _prop_ - 3. _ferret_, _ferry_, and _ferrari_ - 4. Any word ending in _ious_ - 5. A whitespace character followed by a dot, comma, colon, or semicolon - 6. A word longer than six letters - 7. A word without the letter _e_ - -Refer to the table in the -link:09_regexp.html#summary_regexp[chapter summary] for help. Test each -solution with a few test strings. - -ifdef::interactive_target[] -[source,javascript] ----- -// Fill in the regular expressions - -verify(/.../, - ["my car", "bad cats"], - ["camper", "high art"]); - -verify(/.../, - ["pop culture", "mad props"], - ["plop"]); - -verify(/.../, - ["ferret", "ferry", "ferrari"], - ["ferrum", "transfer A"]); - -verify(/.../, - ["how delicious", "spacious room"], - ["ruinous", "consciousness"]); - -verify(/.../, - ["bad punctuation ."], - ["escape the dot"]); - -verify(/.../, - ["hottentottententen"], - ["no", "hotten totten tenten"]); - -verify(/.../, - ["red platypus", "wobbling nest"], - ["earth bed", "learning ape"]); - - -function verify(regexp, yes, no) { - // Ignore unfinished exercises - if (regexp.source == "...") return; - yes.forEach(function(s) { - if (!regexp.test(s)) - console.log("Failure to match '" + s + "'"); - }); - no.forEach(function(s) { - if (regexp.test(s)) - console.log("Unexpected match for '" + s + "'"); - }); -} ----- -endif::interactive_target[] - -=== Quoting style === - -(((quoting style (exercise))))(((single-quote -character)))(((double-quote character)))Imagine you have written a -story and used single ((quotation mark))s throughout to mark pieces -of dialogue. Now you want to replace all the dialogue quotes with -double quotes, while keeping the single quotes used in contractions -like _aren't_. - -(((replace method)))Think of a pattern that distinguishes these two -kinds of quote usage and craft a call to the `replace` method that -does the proper replacement. - -ifdef::interactive_target[] -// test: no - -[source,javascript] ----- -var text = "'I'm the cook,' he said, 'it's my job.'"; -// Change this call. -console.log(text.replace(/A/g, "B")); -// → "I'm the cook," he said, "it's my job." ----- -endif::interactive_target[] - -!!hint!! - -(((quoting style (exercise))))(((boundary)))The most obvious solution -is to only replace quotes with a nonword character on at least one -side. Something like `/\W'|'\W/`. But you also have to take the start -and end of the line into account. - -(((grouping)))(((replace method)))In addition, you must ensure that -the replacement also includes the characters that were matched by the -`\W` pattern so that those are not dropped. This can be done by -wrapping them in ((parentheses)) and including their groups in the -replacement string (`$1`, `$2`). Groups that are not matched will be -replaced by nothing. - -!!hint!! - -=== Numbers again === - -(((number)))A series of ((digit))s can be matched by the simple -regular expression `/\d+/`. - -(((sign)))(((fractional number)))(((syntax)))(((minus)))(((plus -character)))(((exponent)))(((scientific notation)))(((period -character)))Write an expression that matches only JavaScript-style -numbers. It must support an optional minus _or_ plus sign in front of -the number, the decimal dot, and exponent notation—`5e-3` or ++1E10++— -again with an optional sign in front of the exponent. Also note that -it is not necessary for there to be digits in front of or after the -dot, but the number cannot be a dot alone. That is, `.5` and `5.` -are valid JavaScript numbers, but a lone dot _isn't_. - -ifdef::interactive_target[] -// test: no - -[source,javascript] ----- -// Fill in this regular expression. -var number = /^...$/; - -// Tests: -["1", "-1", "+15", "1.55", ".5", "5.", "1.3e2", "1E-4", - "1e+12"].forEach(function(s) { - if (!number.test(s)) - console.log("Failed to match '" + s + "'"); -}); -["1a", "+-1", "1.2.3", "1+1", "1e4.5", ".5.", "1f5", - "."].forEach(function(s) { - if (number.test(s)) - console.log("Incorrectly accepted '" + s + "'"); -}); ----- -endif::interactive_target[] - -!!hint!! - -(((regular expression,escaping)))(((backslash character)))First, do -not forget the backslash in front of the dot. - -Matching the optional ((sign)) in front of the ((number)), as well as -in front of the ((exponent)), can be done with `[+\-]?` or `(\+|-|)` -(plus, minus, or nothing). - -(((pipe character)))The more complicated part of the exercise is the -problem of matching both `"5."` and `".5"` without also matching -`"."`. For this, a good solution is to use the `|` operator to -separate the two cases—either one or more digits optionally followed -by a dot and zero or more digits _or_ a dot followed by one or more -digits. - -(((exponent)))(((case sensitivity)))(((regular -expression,flags)))Finally, to make the _e_ case-insensitive, either -add an `i` option to the regular expression or use `[eE]`. - -!!hint!! diff --git a/10_modules.txt b/10_modules.txt deleted file mode 100644 index a43db2965..000000000 --- a/10_modules.txt +++ /dev/null @@ -1,931 +0,0 @@ -:chap_num: 10 -:prev_link: 09_regexp -:next_link: 11_language -:load_files: ["code/chapter/10_modules.js", "code/loadfile.js"] - -= Modules = - -ifdef::interactive_target[] - -[chapterquote="true"] -[quote, Master Yuan-Ma, The Book of Programming] -____ -A beginning programmer writes her programs like an ant builds her -hill, one piece at a time, without thought for the bigger structure. -Her programs will be like loose sand. They may stand for a while, but -growing too big they fall apart. - -Realizing this problem, the programmer will start to spend a lot of -time thinking about structure. Her programs will be rigidly -structured, like rock sculptures. They are solid, but when they must -change, violence must be done to them. - -The master programmer knows when to apply structure and when to leave -things in their simple form. Her programs are like clay, solid yet -malleable. -____ - -endif::interactive_target[] - -(((organization)))(((code structure)))Every program has a shape. On -a small scale, this shape is determined by its division into -((function))s and the blocks inside those functions. Programmers have -a lot of freedom in the way they structure their programs. Shape follows -more from the ((taste)) of the programmer than from the program's -intended functionality. - -(((readability)))When looking at a larger program in its entirety, -individual functions start to blend into the background. Such a -program can be made more readable if we have a larger unit of -organization. - -_Modules_ divide programs into clusters of code that, by _some_ -criterion, belong together. This chapter explores some of the benefits -that such division provides and shows techniques for building -((module))s in JavaScript. - -== Why modules help == - -(((book analogy)))(((organization)))There are a number of reasons why -authors divide their books into ((chapter))s and sections. These -divisions make it easier for a reader to see how the book is built up -and to find specific parts that they are interested in. They also help -the _author_ by providing a clear focus for every section. - -The benefits of organizing a program into several ((file))s or -((module))s are similar. Structure helps people who aren't yet -familiar with the code find what they are looking for and makes it -easier for the programmer to keep things that are related -close together. - -(((project chapter)))(((readability)))(((interconnection)))Some -programs are even organized along the model of a traditional ((text)), -with a well-defined order in which the reader is encouraged to go -through the program and with lots of prose (comments) providing a coherent -description of the code. This makes reading the program a lot less -intimidating—reading unknown code is usually intimidating—but has the -downside of being more work to set up. It also makes the program more -difficult to change because prose tends to be more tightly -interconnected than code. This style is called _((literate -programming))_. The “project” chapters of this book can be considered -literate programs. - -(((minimalism)))(((evolution)))(((structure)))(((organization)))As a -general rule, structuring things costs energy. In the early stages of -a project, when you are not quite sure yet what goes where or what -kind of ((module))s the program needs at all, I endorse a minimalist, -structureless attitude. Just put everything wherever it is convenient -to put it until the code stabilizes. That way, you won't be wasting -time moving pieces of the program back and forth, and you won't -accidentally lock yourself into a structure that does not actually fit -your program. - -=== Namespacing === - -(((encapsulation)))(((isolation)))(((global -scope)))(((local scope)))Most modern ((programming language))s have a -((scope)) level between _global_ (everyone can see it) and _local_ -(only this function can see it). JavaScript does not. Thus, by -default, everything that needs to be visible outside of the scope of a -top-level function is visible _everywhere_. - -(((namespace pollution)))Namespace pollution, the problem of a lot of -unrelated code having to share a single set of global variable names, -was mentioned in link:04_data.html#namespace_pollution[Chapter 4], -where the `Math` object was given as an example of an object that acts -like a module by grouping math-related functionality. - -(((function,as namespace)))Though JavaScript provides no actual -((module)) construct yet, objects can be used to create publicly -accessible sub((namespace))s, and functions can be used to create an -isolated, private namespace inside of a module. Later in this chapter, -I will discuss a way to build reasonably convenient, namespace-isolating -modules on top of the primitive concepts that JavaScript gives us. - -=== Reuse === - -(((version control)))(((bug)))(((copy-paste programming)))(((ini -file)))(((dependency)))(((structure)))In a “flat” project, which isn't -structured as a set of ((module))s, it is not apparent which parts of -the code are needed to use a particular function. In my program for -spying on my enemies (see link:09_regexp.html#ini[Chapter 9]), I wrote -a function for reading configuration files. If I want to use that -function in another project, I must go and copy out the parts of the -old program that look like they are relevant to the functionality that -I need and paste them into my new program. Then, if I find a mistake -in that code, I'll fix it only in whichever program that I'm working -with at the time and forget to also fix it in the other program. - -(((duplication)))Once you have lots of such shared, duplicated pieces -of code, you will find yourself wasting a lot of time and energy on -moving them around and keeping them up-to-date. - -(((reuse)))Putting pieces of functionality that stand on their own -into separate files and modules makes them easier to track, update, -and share because all the various pieces of code that want to use the -module load it from the same actual file. - -(((dependency)))(((library)))(((installation)))(((upgrading)))This -idea gets even more powerful when the relations between modules—which -other modules each module depends on—are explicitly stated. You can -then automate the process of installing and upgrading external modules -(_libraries_). - -(((package manager)))(((download)))(((reuse)))Taking this idea even -further, imagine an online service that tracks and distributes -hundreds of thousands of such libraries, allowing you to search for -the functionality you need and, once you find it, set up your project -to automatically download it. - -[[modules_npm]] -(((NPM)))This service exists. It is called NPM -(http://npmjs.org[_npmjs.org_]). NPM consists of an online database of -modules and a tool for downloading and upgrading the modules your -program depends on. It grew out of ((Node.js)), the browserless -JavaScript environment we will discuss in -link:20_node.html#node[Chapter 20], but can also be useful when -programming for the browser. - -=== Decoupling === - -(((isolation)))(((decoupling)))(((backward -compatibility)))Another important role of modules is isolating pieces -of code from each other, in the same way that the object interfaces -from link:06_object.html#interface[Chapter 6] do. A well-designed -module will provide an interface for external code to use. As the -module gets updated with ((bug)) fixes and new functionality, the -existing ((interface)) stays the same (it is _stable_) so that other -modules can use the new, improved version without any changes to -themselves. - -(((stability)))Note that a stable interface does not mean no new -functions, methods, or variables are added. It just means that -existing functionality isn't removed and its meaning is not changed. - -(((implementation detail)))(((encapsulation)))A good ((module)) -((interface)) should allow the module to grow without breaking the old -interface. This means exposing as few of the module's internal -concepts as possible while also making the “language” that the -interface exposes powerful and flexible enough to be applicable in a -wide range of situations. - -(((interface,design)))For interfaces that expose a single, focused -concept, such as a configuration file reader, this design comes -naturally. For others, such as a text editor, which has many different -aspects that external code might need to access (content, styling, -user actions, and so on), it requires careful design. - -== Using functions as namespaces == - -(((namespace)))(((function,as namespace)))Functions are the only things in -JavaScript that create a new ((scope)). So if we want our ((module))s -to have their own scope, we will have to base them on functions. - -(((weekday example)))(((Date type)))(((getDay method)))Consider this -trivial module for associating names with day-of-the-week numbers, as -returned by a `Date` object's `getDay` method: - -[source,javascript] ----- -var names = ["Sunday", "Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday"]; -function dayName(number) { - return names[number]; -} - -console.log(dayName(1)); -// → Monday ----- - -(((access control)))(((encapsulation)))The `dayName` function is part -of the module's ((interface)), but the `names` variable is not. We -would prefer _not_ to spill it into the ((global scope)). - -We can do this: - -[source,javascript] ----- -var dayName = function() { - var names = ["Sunday", "Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday"]; - return function(number) { - return names[number]; - }; -}(); - -console.log(dayName(3)); -// → Wednesday ----- - -(((anonymous function)))Now `names` is a local variable in an -(unnamed) function. This function is created and immediately called, -and its return value (the actual `dayName` function) is stored in a -variable. We could have pages and pages of code in this function, with -100 local variables, and they would all be internal to our -module—visible to the module itself but not to outside code. - -(((isolation)))(((side effect)))We can use a similar pattern to -isolate code from the outside world entirely. The following module logs a -value to the console but does not actually provide any values for -other modules to use: - -[source,javascript] ----- -(function() { - function square(x) { return x * x; } - var hundred = 100; - - console.log(square(hundred)); -})(); -// → 10000 ----- - -(((namespace pollution)))This code simply outputs the square of 100, -but in the real world it could be a module that adds a method -to some ((prototype)) or sets up a widget on a web page. It is -wrapped in a function to prevent the variables it uses internally from -polluting the ((global scope)). - -(((parsing)))(((function keyword)))Why did we wrap the namespace -function in a pair of ((parentheses))? This has to do with a quirk in -JavaScript's ((syntax)). If an _((expression))_ starts with the -keyword `function`, it is a function expression. However, if a -_((statement))_ starts with `function`, it is a function -_declaration_, which requires a name and, not being an expression, -cannot be called by writing parentheses after it. You can think of the -extra wrapping parentheses as a trick to force the function to be -interpreted as an expression. - -== Objects as interfaces == - -(((interface)))Now imagine that we want to add another function to our -day-of-the-week module, one that goes from a day name to a -number. We can't simply return the function anymore but must wrap the -two functions in an object. - -[source,javascript] ----- -var weekDay = function() { - var names = ["Sunday", "Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday"]; - return { - name: function(number) { return names[number]; }, - number: function(name) { return names.indexOf(name); } - }; -}(); - -console.log(weekDay.name(weekDay.number("Sunday"))); -// → Sunday ----- - -(((exporting)))(((exports object)))(((this)))For bigger ((module))s, -gathering all the _exported_ values into an object at the end of the -function becomes awkward since many of the exported functions are -likely to be big and you'd prefer to write them somewhere else, near -related internal code. A convenient alternative is to declare an -object (conventionally named `exports`) and add properties to that -whenever we are defining something that needs to be exported. In the -following example, the module function takes its interface object as -an argument, allowing code outside of the function to create it and store -it in a variable. (Outside of a function, `this` refers to the global -scope object.) - -[source,javascript] ----- -(function(exports) { - var names = ["Sunday", "Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday"]; - - exports.name = function(number) { - return names[number]; - }; - exports.number = function(name) { - return names.indexOf(name); - }; -})(this.weekDay = {}); - -console.log(weekDay.name(weekDay.number("Saturday"))); -// → Saturday ----- - -== Detaching from the global scope == - -(((variable,global)))The previous pattern is commonly used by JavaScript -modules intended for the ((browser)). The module will claim a single -global variable and wrap its code in a function in order to have its -own private ((namespace)). But this pattern still causes problems if -multiple modules happen to claim the same name or if you want to load -two ((version))s of a module alongside each other. - -(((module loader)))(((require -function)))(((CommonJS)))(((dependency)))With a little plumbing, we -can create a system that allows one ((module)) to directly ask for the -((interface)) object of another module, without going through the -global scope. Our goal is a `require` function that, when given a -module name, will load that module's file (from disk or the Web, -depending on the platform we are running on) and return the -appropriate interface value. - -This approach solves the problems mentioned previously and has the added -benefit of making your program's dependencies explicit, making it -harder to accidentally make use of some module without stating that -you need it. - -(((readFile function)))(((require function)))For `require` we need two -things. First, we want a function `readFile`, which returns the -content of a given file as a string. (A single such function is not -present in ((standard)) JavaScript, but different JavaScript -environments, such as the browser and Node.js, provide their own ways -of accessing ((file))s. For now, let's just pretend we have this -function.) Second, we need to be able to actually execute this -string as JavaScript code. - -[[eval]] -== Evaluating data as code == - -(((evaluation)))(((interpretation)))There are several ways to take -data (a string of code) and run it as part of the current program. - -(((isolation)))(((eval)))The most obvious way is the special operator -`eval`, which will execute a string of code in the _current_ scope. -This is usually a bad idea because it breaks some of the sane -properties that scopes normally have, such as being isolated from the -outside world. - -[source,javascript] ----- -function evalAndReturnX(code) { - eval(code); - return x; -} - -console.log(evalAndReturnX("var x = 2")); -// → 2 ----- - -(((Function constructor)))A better way of interpreting data as code is -to use the `Function` constructor. This takes two arguments: a string -containing a comma-separated list of argument names and a string -containing the function's body. - -[source,javascript] ----- -var plusOne = new Function("n", "return n + 1;"); -console.log(plusOne(4)); -// → 5 ----- - -This is precisely what we need for our modules. We can wrap a module's -code in a function, with that function's scope becoming our module -((scope)). - -[[commonjs]] -== Require == - -(((require function)))(((CommonJS)))The following is a minimal -implementation of `require`: - -// test: wrap - -[source,javascript] ----- -function require(name) { - var code = new Function("exports", readFile(name)); - var exports = {}; - code(exports); - return exports; -} - -console.log(require("weekDay").name(1)); -// → Monday ----- - -(((weekday example)))(((exports object)))(((Function -constructor)))Since the `new Function` constructor wraps the module -code in a function, we don't have to write a wrapping ((namespace)) -function in the module file itself. And since we make `exports` an -argument to the module function, the module does not have to declare -it. This removes a lot of clutter from our example module. - -[source,javascript] ----- -var names = ["Sunday", "Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday"]; - -exports.name = function(number) { - return names[number]; -}; -exports.number = function(name) { - return names.indexOf(name); -}; ----- - -(((require function)))When using this pattern, a ((module)) typically -starts with a few variable declarations that load the modules it -depends on. - -// test: no - -[source,javascript] ----- -var weekDay = require("weekDay"); -var today = require("today"); - -console.log(weekDay.name(today.dayNumber())); ----- - -(((efficiency)))The simplistic implementation of `require` given previously -has several problems. For one, it will load and run a module every -time it is ++require++d, so if several modules have the same -dependency or a `require` call is put inside a function that will -be called multiple times, time and energy will be wasted. - -(((cache)))This can be solved by storing the modules that have already -been loaded in an object and simply returning the existing value when -one is loaded multiple times. - -(((exports object)))(((exporting)))The second problem is that it is -not possible for a module to directly export a value other than the -`exports` object, such as a function. For example, a module might want -to export only the constructor of the object type it defines. Right -now, it cannot do that because `require` always uses the `exports` -object it creates as the exported value. - -(((module object)))The traditional solution for this is to provide -modules with another variable, `module`, which is an object that has a -property `exports`. This property initially points at the empty object -created by `require` but can be overwritten with another value in -order to export something else. - -// test: wrap -// include_code - -[source,javascript] ----- -function require(name) { - if (name in require.cache) - return require.cache[name]; - - var code = new Function("exports, module", readFile(name)); - var exports = {}, module = {exports: exports}; - code(exports, module); - - require.cache[name] = module.exports; - return module.exports; -} -require.cache = Object.create(null); ----- - -(((require function)))We now have a module system that uses a single -global variable (`require`) to allow modules to find and use each -other without going through the ((global scope)). - -This style of module system is called _((CommonJS)) modules_, after -the pseudo-((standard)) that first specified it. It is built into the -((Node.js)) system. Real implementations do a lot more than the -example I showed. Most importantly, they have a much more intelligent -way of going from a module name to an actual piece of code, allowing -both pathnames relative to the current file and module names that -point directly to locally installed modules. - -[[amd]] -== Slow-loading modules == - -(((loading)))(((synchronous I/O)))(((blocking)))(((World Wide -Web)))Though it is possible to use the CommonJS module style when -writing JavaScript for the ((browser)), it is somewhat involved. The -reason for this is that reading a file (module) from the Web is a lot -slower than reading it from the hard disk. While a script is running -in the browser, nothing else can happen to the website on which it -runs, for reasons that will become clear in -link:14_event.html#timeline[Chapter 14]. This means that if every -`require` call went and fetched something from some faraway web -server, the page would freeze for a painfully long time while loading -its scripts. - -(((Browserify)))(((require function)))(((preprocessing)))One way to -work around this problem is to run a program like -http://browserify.org[_Browserify_] on your code before you serve it -on a web page. This will look for calls to `require`, resolve all -dependencies, and gather the needed code into a single big file. -The website itself can simply load this file to get all the modules -it needs. - -(((AMD)))(((dependency)))(((asynchronous I/O)))Another solution is to wrap the -code that makes up your module in a function so that the ((module -loader)) can first load its dependencies in the background and then -call the function, initializing the ((module)), when the dependencies -have been loaded. That is what the Asynchronous Module Definition -(AMD) module system does. - -(((weekday example)))Our trivial program with dependencies would look -like this in AMD: - -// test: no - -[source,javascript] ----- -define(["weekDay", "today"], function(weekDay, today) { - console.log(weekDay.name(today.dayNumber())); -}); ----- - -(((define function)))(((asynchronous programming)))The `define` -function is central to this approach. It takes first an array of -module names and then a function that takes one argument for each -dependency. It will load the dependencies (if they haven't already -been loaded) in the background, allowing the page to continue working -while the files are being fetched. Once all dependencies are loaded, -`define` will call the function it was given, with the ((interface))s -of those dependencies as arguments. - -(((weekday example)))(((define function)))The modules that are loaded -this way must themselves contain a call to `define`. The value used as -their interface is whatever was returned by the function passed to -`define`. Here is the `weekDay` module again: - -[source,javascript] ----- -define([], function() { - var names = ["Sunday", "Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday"]; - return { - name: function(number) { return names[number]; }, - number: function(name) { return names.indexOf(name); } - }; -}); ----- - -(((define function)))(((backgroundReadFile function)))To be -able to show a minimal implementation of `define`, we will pretend we -have a `backgroundReadFile` function that takes a filename and a -function and calls the function with the content of the file as -soon as it has finished loading it. (link:17_http.html#getURL[Chapter -17] will explain how to write that function.) - -For the purpose of keeping track of modules while they are being -loaded, the implementation of `define` will use objects that describe -the state of modules, telling us whether they are available yet and -providing their interface when they are. - -The `getModule` function, when given a name, will return such an -object and ensure that the module is scheduled to be loaded. It uses -a ((cache)) object to avoid loading the same module twice. - -// include_code - -[source,javascript] ----- -var defineCache = Object.create(null); -var currentMod = null; - -function getModule(name) { - if (name in defineCache) - return defineCache[name]; - - var module = {exports: null, - loaded: false, - onLoad: []}; - defineCache[name] = module; - backgroundReadFile(name, function(code) { - currentMod = module; - new Function("", code)(); - }); - return module; -} ----- - -(((define function)))We assume the loaded file also contains a -(single) call to `define`. The `currentMod` variable is used to tell -this call about the module object that is currently being loaded so -that it can update this object when it finishes loading. We will come -back to this mechanism in a moment. - -(((dependency)))(((Function constructor)))(((asynchronous -programming)))(((event handling)))The `define` function itself uses -`getModule` to fetch or create the module objects for the current -module's dependencies. Its task is to schedule the `moduleFunction` -(the function that contains the module's actual code) to be run -whenever those dependencies are loaded. For this purpose, it defines a -function `whenDepsLoaded` that is added to the `onLoad` array of all -dependencies that are not yet loaded. This function immediately -returns if there are still unloaded dependencies, so it will do -actual work only once, when the last dependency has finished loading. It is -also called immediately, from `define` itself, in case there are no -dependencies that need to be loaded. - -// include_code - -[source,javascript] ----- -function define(depNames, moduleFunction) { - var myMod = currentMod; - var deps = depNames.map(getModule); - - deps.forEach(function(mod) { - if (!mod.loaded) - mod.onLoad.push(whenDepsLoaded); - }); - - function whenDepsLoaded() { - if (!deps.every(function(m) { return m.loaded; })) - return; - - var args = deps.map(function(m) { return m.exports; }); - var exports = moduleFunction.apply(null, args); - if (myMod) { - myMod.exports = exports; - myMod.loaded = true; - myMod.onLoad.forEach(function(f) { f(); }); - } - } - whenDepsLoaded(); -} ----- - -(((define function)))When all dependencies are available, -`whenDepsLoaded` calls the function that holds the module, giving it -the dependencies’ interfaces as arguments. - -The first thing `define` does is store the value that `currentMod` had -when it was called in a variable `myMod`. Remember that `getModule`, -just before evaluating the code for a module, stored the corresponding -module object in `currentMod`. This allows `whenDepsLoaded` to store -the return value of the module function in that module's `exports` -property, set the module's `loaded` property to true, and call all the -functions that are waiting for the module to load. - -(((asynchronous programming)))This code is a lot harder to follow than -the `require` function. Its execution does not follow a simple, -predictable path. Instead, multiple operations are set up to happen at -some unspecified time in the ((future)), which obscures the way the -code executes. - -A real ((AMD)) implementation is, again, quite a lot more clever about -resolving module names to actual URLs and generally more robust than -the one shown previously. The _((RequireJS))_ (http://requirejs.org[_requirejs.org_]) project provides -a popular implementation of this style of ((module loader)). - -== Interface design == - -(((interface,design)))Designing interfaces for modules and object -types is one of the subtler aspects of programming. Any nontrivial -piece of functionality can be modeled in various ways. Finding a way that -works well requires insight and foresight. - -The best way to learn the value of good interface design is to use -lots of interfaces—some good, some bad. Experience will teach -you what works and what doesn't. Never assume that a painful interface -is “just the way it is”. Fix it, or wrap it in a new interface that -works better for you. - -=== Predictability === - -(((documentation)))(((predictability)))(((convention)))If programmers -can predict the way your interface works, they (or you) won't get -sidetracked as often by the need to look up how to use it. Thus, try -to follow conventions. When there is another module or part of the -standard JavaScript environment that does something similar to what -you are implementing, it might be a good idea to make your interface -resemble the existing interface. That way, it'll feel familiar to -people who know the existing interface. - -(((cleverness)))Another area where predictability is important is the -actual _behavior_ of your code. It can be tempting to make an -unnecessarily clever interface with the justification that it's more -convenient to use. For example, you could accept all kinds of -different types and combinations of arguments and do the “right -thing” for all of them. Or you could provide dozens of specialized -convenience functions that provide slightly different flavors of your -module's functionality. These might make code that builds on your -interface slightly shorter, but they will also make it much harder for -people to build a clear ((mental model)) of the module's behavior. - -=== Composability === - -(((composability)))In your interfaces, try to use the simplest ((data -structure))s possible and make functions do a single, clear thing. -Whenever practical, make them ((pure function))s (see -link:03_functions.html#pure[Chapter 3]). - -(((array-like object)))For example, it is not uncommon for modules to -provide their own array-like collection objects, with their own -interface for counting and extracting elements. Such objects won't -have `map` or `forEach` methods, and any existing function that -expects a real array won't be able to work with them. This is an -example of poor __composability__—the module cannot be easily composed -with other code. - -(((encapsulation)))(((spell-check example)))One example would be a -module for spell-checking text, which we might need when we want to -write a text editor. The spell-checker could be made to operate -directly on whichever complicated ((data structure))s the editor uses -and directly call internal functions in the editor to have the user -choose between spelling suggestions. If we go that way, the module -cannot be used with any other programs. On the other hand, if we -define the spell-checking interface so that you can pass it a simple -string and it will return the position in the string where it found a -possible misspelling, along with an array of suggested corrections, -then we have an interface that could also be composed with other -systems because strings and arrays are always available in -JavaScript. - -=== Layered interfaces === - -(((simplicity)))(((complexity)))(((layering)))(((interface -design)))When designing an interface for a complex piece of -functionality—sending email, for example—you often run into a dilemma. -On the one hand, you do not want to overload the user of your -interface with details. They shouldn't have to study your interface -for 20 minutes before they can send an email. On the other hand, you -do not want to hide all the details either—when people need to do -complicated things with your module, they should be able to. - -Often the solution is to provide two interfaces: a detailed -_low-level_ one for complex situations and a simple _high-level_ one -for routine use. The second can usually be built easily using the -tools provided by the first. In the email module, the high-level -interface could just be a function that takes a message, a sender -address, and a receiver address and then sends the email. The low-level -interface would allow full control over email headers, attachments, -HTML mail, and so on. - -== Summary == - -Modules provide structure to bigger programs by separating the code -into different files and namespaces. Giving these modules well-defined -interfaces makes them easier to use and reuse -and makes it possible to continue using them as the module -itself evolves. - -Though the JavaScript language is characteristically unhelpful -when it comes to modules, the flexible functions and objects it -provides make it possible to define rather nice module systems. -Function scopes can be used as internal namespaces for the module, and -objects can be used to store sets of exported values. - -There are two popular, well-defined approaches to such modules. One is -called _CommonJS Modules_ and revolves around a `require` function -that fetches a module by name and returns its interface. The other is -called _AMD_ and uses a `define` function that takes an array of -module names and a function and, after loading the modules, runs the -function with their interfaces as arguments. - -== Exercises == - -=== Month names === - -(((Date type)))(((weekday example)))(((month name (exercise))))Write a -simple module similar to the `weekDay` module that can convert month -numbers (zero-based, as in the `Date` type) to names and can convert names back -to numbers. Give it its own namespace since it will need an internal -array of month names, and use plain JavaScript, without any module -loader system. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. - -console.log(month.name(2)); -// → March -console.log(month.number("November")); -// → 10 ----- -endif::interactive_target[] - -!!hint!! - -(((month name (exercise))))This follows the `weekDay` module almost -exactly. A function expression, called immediately, wraps the variable -that holds the array of names, along with the two functions that must -be exported. The functions are put in an object and returned. The -returned interface object is stored in the `month` variable. - -!!hint!! - -=== A return to electronic life === - -(((electronic life)))(((module)))Hoping that -link:07_elife.html#elife[Chapter 7] is still somewhat fresh in your -mind, think back to the system designed in that chapter and come up -with a way to separate the code into modules. To refresh your memory, -these are the functions and types defined in that chapter, in order of -appearance: - ----- -Vector -Grid -directions -directionNames -randomElement -BouncingCritter -elementFromChar -World -charFromElement -Wall -View -WallFollower -dirPlus -LifelikeWorld -Plant -PlantEater -SmartPlantEater -Tiger ----- - -(((book analogy)))Don't exaggerate and create too many modules. A book -that starts a new chapter for every page would probably get on your -nerves, if only because of all the space wasted on titles. Similarly, -having to open 10 files to read a tiny project isn't helpful. Aim for -three to five modules. - -(((encapsulation)))You can choose to have some functions become -internal to their module and thus inaccessible to other modules. - -There is no single correct solution here. Module organization is -largely a matter of ((taste)). - -!!hint!! - -Here is what I came up with. I've put parentheses around internal -functions. - ----- -Module "grid" - Vector - Grid - directions - directionNames - -Module "world" - (randomElement) - (elementFromChar) - (charFromElement) - View - World - LifelikeWorld - directions [reexported] - -Module "simple_ecosystem" - (randomElement) [duplicated] - (dirPlus) - Wall - BouncingCritter - WallFollower - -Module "ecosystem" - Wall [duplicated] - Plant - PlantEater - SmartPlantEater - Tiger ----- - -(((exporting)))I have reexported the `directions` array from the -`grid` module from `world` so that modules built on that (the -ecosystems) don't have to know or worry about the existence of the -`grid` module. - -(((duplication)))I also duplicated two generic and tiny helper values -(`randomElement` and `Wall`) since they are used as internal details -in different contexts and do not belong in the interfaces for these -modules. - -!!hint!! - -=== Circular dependencies === - -(((dependency)))(((circular dependency)))(((require function)))A -tricky subject in dependency management is circular dependencies, -where module A depends on B, and B also depends on A. Many module -systems simply forbid this. ((CommonJS)) modules allow a limited form: -it works as long as the modules do not replace their default `exports` -object with another value and start accessing each other's -interface only after they finish loading. - -Can you think of a way in which support for this feature could be -implemented? Look back to the definition of `require` and consider -what the function would have to do to allow this. - -!!hint!! - -(((overriding)))(((circular dependency)))(((exports object)))The trick -is to add the `exports` object created for a module to `require`'s -((cache)) _before_ actually running the module. This means the module -will not yet have had a chance to override `module.exports`, so we do -not know whether it may want to export some other value. After -loading, the cache object is overridden with `module.exports`, which -may be a different value. - -But if in the course of loading the module, a second module is loaded -that asks for the first module, its default `exports` object, which is likely -still empty at this point, will be in the cache, and the second module -will receive a reference to it. If it doesn't try to do anything with -the object until the first module has finished loading, things will -work. - -!!hint!! diff --git a/11_language.txt b/11_language.txt deleted file mode 100644 index 4610f8310..000000000 --- a/11_language.txt +++ /dev/null @@ -1,881 +0,0 @@ -:chap_num: 11 -:prev_link: 10_modules -:next_link: 12_browser -:load_files: ["code/chapter/11_language.js"] -:zip: node/html - -= Project: A Programming Language = - -[chapterquote="true"] -[quote, Hal Abelson and Gerald Sussman, Structure and Interpretation of Computer Programs] -____ -The evaluator, which determines the meaning of expressions in a -programming language, is just another program. -____ - -ifdef::interactive_target[] - -[chapterquote="true"] -[quote, Master Yuan-Ma, The Book of Programming] -____ -When a student asked the master about the nature of the cycle of Data -and Control, Yuan-Ma replied ‘Think of a compiler, compiling itself.’ -____ - -endif::interactive_target[] - -(((Abelson+++,+++ Hal)))(((Sussman+++,+++ -Gerald)))(((SICP)))(((project chapter)))Building your own -((programming language)) is surprisingly easy (as long as you do not -aim too high) and very enlightening. - -The main thing I want to show in this chapter is that there is no -((magic)) involved in building your own language. I've often felt that -some human inventions were so immensely clever and complicated that -I'd never be able to understand them. But with a little reading and -tinkering, such things often turn out to be quite mundane. - -(((Egg language)))We will build a programming language called Egg. It -will be a tiny, simple language but one that is powerful enough to -express any computation you can think of. It will also allow simple -((abstraction)) based on ((function))s. - -[[parsing]] -== Parsing == - -(((parsing)))(((validation)))The most immediately visible part of a -programming language is its _((syntax))_, or notation. A _parser_ is a -program that reads a piece of text and produces a data structure that -reflects the structure of the program contained in that text. If the -text does not form a valid program, the parser should complain and -point out the error. - -(((special form)))Our language will have a simple and uniform -syntax. Everything in Egg is an ((expression)). An expression can be a -variable, a number, a string, or an _application_. Applications are -used for function calls but also for constructs such as `if` or `while`. - -(((double-quote character)))(((parsing)))(((escaping,in strings)))To -keep the parser simple, strings in Egg do not support anything like -backslash escapes. A string is simply a sequence of characters that -are not double quotes, wrapped in double quotes. A number is a -sequence of digits. Variable names can consist of any character that -is not ((whitespace)) and does not have a special meaning in the -syntax. - -(((comma character)))Applications are written the way they are in -JavaScript, by putting ((parentheses)) after an expression and having -any number of ((argument))s between those parentheses, separated by -commas. - ----- -do(define(x, 10), - if(>(x, 5), - print("large"), - print("small"))) ----- - -(((block)))The ((uniformity)) of the ((Egg language)) means that -things that are ((operator))s in JavaScript (such as `>`) are normal -variables in this language, applied just like other ((function))s. And -since the ((syntax)) has no concept of a block, we need a `do` -construct to represent doing multiple things in sequence. - -(((type property)))(((parsing)))The ((data structure)) that the parser will -use to describe a program will consist of ((expression)) objects, each -of which has a `type` property indicating the kind of expression it is -and other properties to describe its content. - -(((identifier)))Expressions of type `"value"` represent literal strings -or numbers. Their `value` property contains the string or number value -that they represent. Expressions of type `"word"` are used for -identifiers (names). Such objects have a `name` property that holds -the identifier's name as a string. Finally, `"apply"` expressions -represent applications. They have an `operator` property that refers -to the expression that is being applied, and they have an `args` property that -refers to an array of argument expressions. - -The `>(x, 5)` part of the previous program would be represented like this: - -[source,application/json] ----- -{ - type: "apply", - operator: {type: "word", name: ">"}, - args: [ - {type: "word", name: "x"}, - {type: "value", value: 5} - ] -} ----- - -indexsee:[abstract syntax tree,syntax tree] -Such a ((data structure)) is called a _((syntax tree))_. If you -imagine the objects as dots and the links between them as lines -between those dots, it has a ((tree))like shape. The fact that -expressions contain other expressions, which in turn might contain -more expressions, is similar to the way branches split and split again. - -image::img/syntax_tree.svg[alt="The structure of a syntax tree",width="5cm"] - -(((parsing)))Contrast this to the parser we wrote for the -configuration file format in link:09_regexp.html#ini[Chapter 9], which -had a simple structure: it split the input into lines and -handled those lines one at a time. There were only a few simple forms -that a line was allowed to have. - -(((recursion)))(((nesting,of expressions)))Here we must find a -different approach. Expressions are not separated into lines, and they -have a recursive structure. Application expressions _contain_ other -expressions. - -(((elegance)))Fortunately, this problem can be solved elegantly by -writing a parser function that is recursive in a way that reflects the -recursive nature of the language. - -(((parseExpression function)))(((syntax tree)))We define a function -`parseExpression`, which takes a string as input and returns an -object containing the data structure for the expression at the start -of the string, along with the part of the string left after parsing -this expression. When parsing subexpressions (the argument to an -application, for example), this function can be called again, yielding -the argument expression as well as the text that remains. This text -may in turn contain more arguments or may be the closing parenthesis -that ends the list of arguments. - -This is the first part of the parser: - -// include_code - -[source,javascript] ----- -function parseExpression(program) { - program = skipSpace(program); - var match, expr; - if (match = /^"([^"]*)"/.exec(program)) - expr = {type: "value", value: match[1]}; - else if (match = /^\d+\b/.exec(program)) - expr = {type: "value", value: Number(match[0])}; - else if (match = /^[^\s(),"]+/.exec(program)) - expr = {type: "word", name: match[0]}; - else - throw new SyntaxError("Unexpected syntax: " + program); - - return parseApply(expr, program.slice(match[0].length)); -} - -function skipSpace(string) { - var first = string.search(/\S/); - if (first == -1) return ""; - return string.slice(first); -} ----- - -(((skipSpace function)))Because Egg allows any amount of -((whitespace)) between its elements, we have to repeatedly cut the -whitespace off the start of the program string. This is what the -`skipSpace` function helps with. - -(((literal expression)))(((SyntaxError type)))After skipping any -leading space, `parseExpression` uses three ((regular expression))s to -spot the three simple (atomic) elements that Egg supports: strings, -numbers, and words. The parser constructs a different kind of data -structure depending on which one matches. If the input does not match -one of these three forms, it is -not a valid expression, and the parser throws an error. `SyntaxError` is a -standard error object type, which is raised when an attempt is made to -run an invalid JavaScript program. - -(((parseApply function)))We can then cut off the part that we matched -from the program string and pass that, along with the object for the -expression, to `parseApply`, which checks whether the expression is an -application. If so, it parses a parenthesized list of arguments. - -// include_code - -[source,javascript] ----- -function parseApply(expr, program) { - program = skipSpace(program); - if (program[0] != "(") - return {expr: expr, rest: program}; - - program = skipSpace(program.slice(1)); - expr = {type: "apply", operator: expr, args: []}; - while (program[0] != ")") { - var arg = parseExpression(program); - expr.args.push(arg.expr); - program = skipSpace(arg.rest); - if (program[0] == ",") - program = skipSpace(program.slice(1)); - else if (program[0] != ")") - throw new SyntaxError("Expected ',' or ')'"); - } - return parseApply(expr, program.slice(1)); -} ----- - -(((parsing)))If the next character in the program is not an opening -parenthesis, this is not an application, and `parseApply` simply -returns the expression it was given. - -(((recursion)))Otherwise, it skips the opening parenthesis and -creates the ((syntax tree)) object for this application expression. It -then recursively calls `parseExpression` to parse each argument until a -closing parenthesis is found. The recursion is indirect, through -`parseApply` and `parseExpression` calling each other. - -Because an application expression can itself be applied (such as in -`multiplier(2)(1)`), `parseApply` must, after it has parsed an -application, call itself again to check whether another pair of -parentheses follows. - -(((syntax tree)))(((Egg language)))(((parse function)))This is all we -need to parse Egg. We wrap it in a convenient `parse` function that -verifies that it has reached the end of the input string after parsing -the expression (an Egg program is a single expression), and that -gives us the program's data structure. - -// include_code strip_log -// test: join - -[source,javascript] ----- -function parse(program) { - var result = parseExpression(program); - if (skipSpace(result.rest).length > 0) - throw new SyntaxError("Unexpected text after program"); - return result.expr; -} - -console.log(parse("+(a, 10)")); -// → {type: "apply", -// operator: {type: "word", name: "+"}, -// args: [{type: "word", name: "a"}, -// {type: "value", value: 10}]} ----- - -(((error message)))It works! It doesn't give us very helpful -information when it fails and doesn't store the line and column on -which each expression starts, which might be helpful when reporting -errors later, but it's good enough for our purposes. - -== The evaluator == - -(((evaluate function)))(((evaluation)))(((interpretation)))(((syntax -tree)))(((Egg language)))What can we do with the syntax tree for a -program? Run it, of course! And that is what the evaluator does. You -give it a syntax tree and an environment object that associates names -with values, and it will evaluate the expression that the tree -represents and return the value that this produces. - -// include_code - -[source,javascript] ----- -function evaluate(expr, env) { - switch(expr.type) { - case "value": - return expr.value; - - case "word": - if (expr.name in env) - return env[expr.name]; - else - throw new ReferenceError("Undefined variable: " + - expr.name); - case "apply": - if (expr.operator.type == "word" && - expr.operator.name in specialForms) - return specialForms[expr.operator.name](expr.args, - env); - var op = evaluate(expr.operator, env); - if (typeof op != "function") - throw new TypeError("Applying a non-function."); - return op.apply(null, expr.args.map(function(arg) { - return evaluate(arg, env); - })); - } -} - -var specialForms = Object.create(null); ----- - -(((literal expression)))(((environment)))The evaluator has code for -each of the ((expression)) types. A literal value expression simply -produces its value. (For example, the expression `100` just evaluates -to the number 100.) For a variable, we must check whether it is -actually defined in the environment and, if it is, fetch the -variable's value. - -(((function,application)))Applications are more involved. If they are -a ((special form)), like `if`, we do not evaluate anything and simply -pass the argument expressions, along with the environment, to the -function that handles this form. If it is a normal call, we evaluate -the operator, verify that it is a function, and call it with the -result of evaluating the arguments. - -We will use plain JavaScript function values to represent Egg's -function values. We will come back to this -link:11_language.html#egg_fun[later], when the special form called -`fun` is defined. - -(((readability)))(((evaluate -function)))(((recursion)))(((parsing)))The recursive structure of -`evaluate` resembles the similar structure of the parser. Both mirror -the structure of the language itself. It would also be possible to -integrate the parser with the evaluator and evaluate during parsing, -but splitting them up this way makes the program more readable. - -(((Egg language)))(((interpretation)))This is really all that is -needed to interpret Egg. It is that simple. But without defining a few -special forms and adding some useful values to the ((environment)), -you can't do anything with this language yet. - -== Special forms == - -(((special form)))(((specialForms object)))The `specialForms` object -is used to define special syntax in Egg. It associates words with -functions that evaluate such special forms. It is currently empty. -Let's add some forms. - -// include_code - -[source,javascript] ----- -specialForms["if"] = function(args, env) { - if (args.length != 3) - throw new SyntaxError("Bad number of args to if"); - - if (evaluate(args[0], env) !== false) - return evaluate(args[1], env); - else - return evaluate(args[2], env); -}; ----- - -(((conditional execution)))Egg's `if` construct expects exactly three -arguments. It will evaluate the first, and if the result isn't the -value `false`, it will evaluate the second. Otherwise, the third gets -evaluated. This `if` form is more similar to JavaScript's ternary `?:` -operator than to JavaScript's `if`. It is an expression, not a statement, -and it produces a value, namely, the result of the second or third -argument. - -(((Boolean)))Egg differs from JavaScript in how it handles the -condition value to `if`. It will not treat things like zero or the -empty string as false, but only the precise value `false`. - -(((short-circuit evaluation)))The reason we need to represent `if` as -a special form, rather than a regular function, is that all arguments -to functions are evaluated before the function is called, whereas -`if` should evaluate only _either_ its second or its third argument, -depending on the value of the first. - -The `while` form is similar. - -// include_code - -[source,javascript] ----- -specialForms["while"] = function(args, env) { - if (args.length != 2) - throw new SyntaxError("Bad number of args to while"); - - while (evaluate(args[0], env) !== false) - evaluate(args[1], env); - - // Since undefined does not exist in Egg, we return false, - // for lack of a meaningful result. - return false; -}; ----- - -Another basic building block is `do`, which executes all its arguments -from top to bottom. Its value is the value produced by the last -argument. - -// include_code - -[source,javascript] ----- -specialForms["do"] = function(args, env) { - var value = false; - args.forEach(function(arg) { - value = evaluate(arg, env); - }); - return value; -}; ----- - -(((= operator)))To be able to create ((variable))s and give them new -values, we also create a form called `define`. It expects a word as -its first argument and an expression producing the value to assign to -that word as its second argument. Since `define`, like everything, is -an expression, it must return a value. We'll make it return the value -that was assigned (just like JavaScript's `=` operator). - -// include_code - -[source,javascript] ----- -specialForms["define"] = function(args, env) { - if (args.length != 2 || args[0].type != "word") - throw new SyntaxError("Bad use of define"); - var value = evaluate(args[1], env); - env[args[0].name] = value; - return value; -}; ----- - -== The environment == - -(((Egg language)))(((evaluate function)))The ((environment)) accepted -by `evaluate` is an object with properties whose names correspond to -variable names and whose values correspond to the values those -((variable))s are bound to. Let's define an environment object to -represent the ((global scope)). - -To be able to use the `if` construct we just defined, we must -have access to ((Boolean)) values. Since there are only two -Boolean values, we do not need special syntax for them. We simply bind -two variables to the values `true` and `false` and use those. - -// include_code - -[source,javascript] ----- -var topEnv = Object.create(null); - -topEnv["true"] = true; -topEnv["false"] = false; ----- - -We can now evaluate a simple expression that negates a Boolean value. - -[source,javascript] ----- -var prog = parse("if(true, false, true)"); -console.log(evaluate(prog, topEnv)); -// → false ----- - -(((arithmetic)))(((Function constructor)))To supply basic -((arithmetic)) and ((comparison)) ((operator))s, we will also add some -function values to the ((environment)). In the interest of keeping the -code short, we'll use `new Function` to synthesize a bunch of operator -functions in a loop, rather than defining them all individually. - -// include_code - -[source,javascript] ----- -["+", "-", "*", "/", "==", "<", ">"].forEach(function(op) { - topEnv[op] = new Function("a, b", "return a " + op + " b;"); -}); ----- - -A way to ((output)) values is also very useful, so we'll wrap -`console.log` in a function and call it `print`. - -// include_code - -[source,javascript] ----- -topEnv["print"] = function(value) { - console.log(value); - return value; -}; ----- - -(((parsing)))(((run function)))That gives us enough elementary tools -to write simple programs. The following `run` function provides a -convenient way to write and run them. It creates a fresh environment -and parses and evaluates the strings we give it as a single program. - -// include_code - -[source,javascript] ----- -function run() { - var env = Object.create(topEnv); - var program = Array.prototype.slice - .call(arguments, 0).join("\n"); - return evaluate(parse(program), env); -} ----- - -(((join method)))(((call method)))The use of -`Array.prototype.slice.call` is a trick to turn an ((array-like -object)), such as `arguments`, into a real array so that we can call -`join` on it. It takes all the arguments given to `run` and treats -them as the lines of a program. - -[source,javascript] ----- -run("do(define(total, 0),", - " define(count, 1),", - " while(<(count, 11),", - " do(define(total, +(total, count)),", - " define(count, +(count, 1)))),", - " print(total))"); -// → 55 ----- - -(((summing example)))(((Egg language)))This is the program we've seen -several times before, which computes the sum of the numbers 1 to 10, -expressed in Egg. It is clearly uglier than the equivalent JavaScript -program but not bad for a language implemented in less than 150 -((lines of code)). - -[[egg_fun]] -== Functions == - -(((function)))(((Egg language)))A programming language without -functions is a poor programming language indeed. - -Fortunately, it is not hard to add a `fun` construct, which treats its -last argument as the function's body and treats all the arguments before that as -the names of the function's arguments. - -// include_code - -[source,javascript] ----- -specialForms["fun"] = function(args, env) { - if (!args.length) - throw new SyntaxError("Functions need a body"); - function name(expr) { - if (expr.type != "word") - throw new SyntaxError("Arg names must be words"); - return expr.name; - } - var argNames = args.slice(0, args.length - 1).map(name); - var body = args[args.length - 1]; - - return function() { - if (arguments.length != argNames.length) - throw new TypeError("Wrong number of arguments"); - var localEnv = Object.create(env); - for (var i = 0; i < arguments.length; i++) - localEnv[argNames[i]] = arguments[i]; - return evaluate(body, localEnv); - }; -}; ----- - -(((local scope)))(((Object.create function)))(((prototype)))Functions -in Egg have their own local environment, just like in JavaScript. We -use `Object.create` to make a new object that has access to the -variables in the outer environment (its prototype) but that can also -contain new variables without modifying that outer scope. - -(((power example)))(((evaluation)))(((interpretation)))The function -created by the `fun` form creates this local environment and adds the -argument variables to it. It then evaluates the function body in this -environment and returns the result. - -// start_code - -[source,javascript] ----- -run("do(define(plusOne, fun(a, +(a, 1))),", - " print(plusOne(10)))"); -// → 11 - -run("do(define(pow, fun(base, exp,", - " if(==(exp, 0),", - " 1,", - " *(base, pow(base, -(exp, 1)))))),", - " print(pow(2, 10)))"); -// → 1024 ----- - -== Compilation == - -(((interpretation)))(((compilation)))What we have built is an -interpreter. During evaluation, it acts directly on the representation -of the program produced by the parser. - -(((efficiency)))(((performance)))_Compilation_ is the process of -adding another step between the parsing and the running of a program, -which transforms the program into something that can be evaluated more -efficiently by doing as much work as possible in advance. For example, -in well-designed languages it is obvious, for each use of a -((variable)), which variable is being referred to, without actually -running the program. This can be used to avoid looking up the variable -by name every time it is accessed and to directly fetch it from some -predetermined ((memory)) location. - -Traditionally, ((compilation)) involves converting the program to -((machine code)), the raw format that a computer's processor can -execute. But any process that converts a program to a different -representation can be thought of as compilation. - -(((simplicity)))(((Function constructor)))(((transpilation)))It would -be possible to write an alternative ((evaluation)) strategy for Egg, -one that first converts the program to a JavaScript program, uses `new -Function` to invoke the JavaScript compiler on it, and then runs the -result. When done right, this would make Egg run very fast while -still being quite simple to implement. - -If you are interested in this topic and willing to spend some time on -it, I encourage you to try to implement such a compiler as an -exercise. - -== Cheating == - -(((Egg language)))When we defined `if` and `while`, you probably -noticed that they were more or less trivial wrappers around -JavaScript's own `if` and `while`. Similarly, the values in Egg are -just regular old JavaScript values. - -If you compare the implementation of Egg, built on top of JavaScript, -with the amount of work and complexity required to build a programming -language directly on the raw functionality provided by a machine, the -difference is huge. Regardless, this example hopefully gave you an -impression of the way ((programming language))s work. - -And when it comes to getting something done, cheating is more -effective than doing everything yourself. Though the toy language in -this chapter doesn't do anything that couldn't be done better in -JavaScript, there _are_ situations where writing small languages helps -get real work done. - -Such a language does not have to resemble a typical programming -language. If JavaScript didn't come equipped with regular expressions, -you could write your own parser and evaluator for such a sublanguage. - -(((artificial intelligence)))Or imagine you are building a giant -robotic ((dinosaur)) and need to program its ((behavior)). JavaScript -might not be the most effective way to do this. You might instead opt -for a language that looks like this: - ----- -behavior walk - perform when - destination ahead - actions - move left-foot - move right-foot - -behavior attack - perform when - Godzilla in-view - actions - fire laser-eyes - launch arm-rockets ----- - -(((expressivity)))This is what is usually called a _((domain-specific -language))_, a language tailored to express a narrow domain of -knowledge. Such a language can be more expressive than a -general-purpose language because it is designed to express exactly the -things that need expressing in its domain and nothing else. - -== Exercises == - -=== Arrays === - -(((Egg language)))Add support for ((array))s to Egg by adding the -following three functions to the top scope: `array(...)` to -construct an array containing the argument values, `length(array)` to -get an array's length, and `element(array, n)` to fetch the n^th^ -element from an array. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Modify these definitions... - -topEnv["array"] = "..."; - -topEnv["length"] = "..."; - -topEnv["element"] = "..."; - -run("do(define(sum, fun(array,", - " do(define(i, 0),", - " define(sum, 0),", - " while(<(i, length(array)),", - " do(define(sum, +(sum, element(array, i))),", - " define(i, +(i, 1)))),", - " sum))),", - " print(sum(array(1, 2, 3))))"); -// → 6 ----- -endif::interactive_target[] - -!!hint!! - -The easiest way to do this is to represent Egg arrays -with JavaScript arrays. - -(((slice method)))The values added to the top environment must be -functions. `Array.prototype.slice` can be used to convert an -`arguments` array-like object into a regular array. - -!!hint!! - -=== Closure === - -(((closure)))(((function,scope)))The way we have defined `fun` allows -functions in Egg to “close over” the surrounding environment, allowing -the function's body to use local values that were visible at the time -the function was defined, just like JavaScript functions do. - -The following program illustrates this: function `f` returns a function -that adds its argument to `f`'s argument, meaning that it needs access -to the local ((scope)) inside `f` to be able to use variable `a`. - -[source,javascript] ----- -run("do(define(f, fun(a, fun(b, +(a, b)))),", - " print(f(4)(5)))"); -// → 9 ----- - -Go back to the definition of the `fun` form and explain which -mechanism causes this to work. - -!!hint!! - -(((closure)))Again, we are riding along on a JavaScript mechanism to -get the equivalent feature in Egg. Special forms are passed the local -environment in which they are evaluated so that they can evaluate -their subforms in that environment. The function returned by `fun` -closes over the `env` argument given to its enclosing function and -uses that to create the function's local ((environment)) when it is -called. - -(((compilation)))This means that the ((prototype)) of the local -environment will be the environment in which the function was created, -which makes it possible to access variables in that environment from -the function. This is all there is to implementing closure (though to -compile it in a way that is actually efficient, you'd need to do some -more work). - -!!hint!! - -=== Comments === - -(((hash character)))(((Egg language)))It would be nice if we could -write ((comment))s in Egg. For example, whenever we find a hash sign -(`#`), we could treat the rest of the line as a comment and ignore it, -similar to `//` in JavaScript. - -(((skipSpace function)))We do not have to make any big changes to the -parser to support this. We can simply change `skipSpace` to skip -comments like they are ((whitespace)) so that all the points where -`skipSpace` is called will now also skip comments. Make this change. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// This is the old skipSpace. Modify it... -function skipSpace(string) { - var first = string.search(/\S/); - if (first == -1) return ""; - return string.slice(first); -} - -console.log(parse("# hello\nx")); -// → {type: "word", name: "x"} - -console.log(parse("a # one\n # two\n()")); -// → {type: "apply", -// operator: {type: "word", name: "a"}, -// args: []} ----- -endif::interactive_target[] - -!!hint!! - -(((comment)))Make sure your solution handles multiple comments in a -row, with potentially ((whitespace)) between or after them. - -A ((regular expression)) is probably the easiest way to solve this. -Write something that matches “whitespace or a comment, zero or more -times”. Use the `exec` or `match` method and look at the length of -the first element in the returned array (the whole match) to find out -how many characters to slice off. - -!!hint!! - -=== Fixing scope === - -(((variable,definition)))(((assignment)))Currently, the only way to -assign a ((variable)) a value is `define`. This construct acts as -a way both to define new variables and to give existing ones a new value. - -(((local variable)))This ((ambiguity)) causes a problem. When you try -to give a nonlocal variable a new value, you will end up defining a -local one with the same name instead. (Some languages work like this -by design, but I've always found it a silly way to handle ((scope)).) - -(((ReferenceError type)))Add a special form `set`, similar to -`define`, which gives a variable a new value, updating the variable in -an outer scope if it doesn't already exist in the inner scope. If the -variable is not defined at all, throw a `ReferenceError` (which is -another standard error type). - -(((hasOwnProperty method)))(((prototype)))(((getPrototypeOf -function)))The technique of representing scopes as simple objects, -which has made things convenient so far, will get in your way a -little at this point. You might want to use the -`Object.getPrototypeOf` function, which returns the prototype of an -object. Also remember that scopes do not derive from -`Object.prototype`, so if you want to call `hasOwnProperty` on them, -you have to use this clumsy expression: - -// test: no - -[source,javascript] ----- -Object.prototype.hasOwnProperty.call(scope, name); ----- - -This fetches the `hasOwnProperty` method from the `Object` prototype -and then calls it on a scope object. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -specialForms["set"] = function(args, env) { - // Your code here. -}; - -run("do(define(x, 4),", - " define(setx, fun(val, set(x, val))),", - " setx(50),", - " print(x))"); -// → 50 -run("set(quux, true)"); -// → Some kind of ReferenceError ----- -endif::interactive_target[] - -!!hint!! - -(((variable,definition)))(((assignment)))(((getPrototypeOf -function)))(((hasOwnProperty method)))You will have to loop through -one ((scope)) at a time, using `Object.getPrototypeOf` to go the next -outer scope. For each scope, use `hasOwnProperty` to find out whether the -variable, indicated by the `name` property of the first argument to -`set`, exists in that scope. If it does, set it to the result of -evaluating the second argument to `set` and then return that value. - -(((global scope)))(((run-time error)))If the outermost scope is -reached (`Object.getPrototypeOf` returns null) and we haven't found -the variable yet, it doesn't exist, and an error should be thrown. - -!!hint!! diff --git a/12_browser.txt b/12_browser.txt deleted file mode 100644 index 2bf182345..000000000 --- a/12_browser.txt +++ /dev/null @@ -1,396 +0,0 @@ -:chap_num: 12 -:prev_link: 11_language -:next_link: 13_dom - -= JavaScript and the Browser = - -[chapterquote="true"] -[quote,Douglas Crockford,The JavaScript Programming Language (video lecture)] -____ -The browser is a really hostile programming environment. -____ - -(((Crockford+++,+++ Douglas)))(((JavaScript,history of)))(((World Wide -Web)))The next part of this book will talk about web browsers. Without -web ((browser))s, there would be no JavaScript. And even if there -were, no one would ever have paid any attention to it. - -(((decentralization)))(((compatibility)))Web technology has, from the -start, been decentralized, not just technically but also in the -way it has evolved. Various browser vendors have added new -functionality in ad hoc and sometimes poorly thought out ways, which -then sometimes ended up being adopted by others and finally set down -as a ((standard)). - -This is both a blessing and a curse. On the one hand, it is empowering -to not have a central party control a system but have it be improved -by various parties working in loose ((collaboration)) (or, -occasionally, open hostility). On the other hand, the haphazard way in -which the Web was developed means that the resulting system is not -exactly a shining example of internal ((consistency)). In fact, some -parts of it are downright messy and confusing. - -== Networks and the Internet == - -Computer ((network))s have been around since the 1950s. If you put -cables between two or more computers and allow them to send data back -and forth through these cables, you can do all kinds of wonderful -things. - -If connecting two machines in the same building allows us to do -wonderful things, connecting machines all over the planet should be -even better. The technology to start implementing this vision was -developed in the 1980s, and the resulting network is called the -_((Internet))_. It has lived up to its promise. - -A computer can use this network to spew bits at another computer. For -any effective ((communication)) to arise out of this bit-spewing, the -computers at both ends must know what the bits are supposed to -represent. The meaning of any given sequence of bits depends entirely -on the kind of thing that it is trying to express and on the -((encoding)) mechanism used. - -A _network ((protocol))_ describes a style of communication over a -((network)). There are protocols for sending email, for fetching email, -for sharing files, or even for controlling computers that happen to be -infected by malicious software. - -For example, a simple ((chat)) protocol might consist of one computer -sending the bits that represent the text “CHAT?” to another machine -and the other responding with “OK!” to confirm that it understands the -protocol. They can then proceed to send each other strings of text, -read the text sent by the other from the network, and display whatever -they receive on their screens. - -(((layering)))(((stream)))(((ordering)))Most protocols are built on -top of other protocols. Our example chat protocol treats the network -as a streamlike device into which you can put bits and have them -arrive at the correct destination in the correct order. Ensuring those -things is already a rather difficult technical problem. - -indexsee:[Transmission Control Protocol,TCP] -(((TCP)))The _Transmission Control Protocol_ (TCP) is a ((protocol)) that -solves this problem. All Internet-connected devices “speak” it, and -most communication on the ((Internet)) is built on top of it. - -(((listening (TCP))))A TCP ((connection)) works as follows: one -computer must be waiting, or _listening_, for other computers to start -talking to it. To be able to listen for different kinds of -communication at the same time on a single machine, each listener has -a number (called a _((port))_) associated with it. Most ((protocol))s -specify which port should be used by default. For example, when we -want to send an email using the ((SMTP)) protocol, the machine through -which we send it is expected to be listening on port 25. - -Another computer can then establish a ((connection)) by connecting to -the target machine using the correct port number. If the target -machine can be reached and is listening on that port, the connection -is successfully created. The listening computer is called the -_((server))_, and the connecting computer is called the _((client))_. - -Such a connection acts as a two-way ((pipe)) through which bits can -flow—the machines on both ends can put data into it. Once the bits are -successfully transmitted, they can be read out again by the machine on -the other side. This is a convenient model. You could say that -((TCP)) provides an ((abstraction)) of the network. - -[[web]] -== The Web == - -The _((World Wide Web))_ (not to be confused with the ((Internet)) as -a whole) is a set of ((protocol))s and formats that allow us to visit -web pages in a browser. The “Web” part in the name refers to the fact -that such pages can easily link to each other, thus connecting into a -huge ((mesh)) that users can move through. - -indexsee:[Hypertext Transfer Prototol,HTTP] -(((HTTP)))To add content to the Web, all you need to do is connect a machine to -the ((Internet)), and have it listen on port 80, using the -_Hypertext Transfer Protocol_ (HTTP). This protocol allows other computers -to request documents over the ((network)). - -indexsee:[Uniform Resource Locator,URL] -(((URL)))Each ((document)) on the Web is named by a _Uniform Resource -Locator_ (URL), which looks something like this: - ----- - http://eloquentjavascript.net/12_browser.html - | | | | - protocol server path ----- - -(((HTTPS)))The first part tells us that this URL uses the HTTP -((protocol)) (as opposed to, for example, encrypted HTTP, which would -be _https://_). Then comes the part that identifies which ((server)) -we are requesting the document from. Last is a path string that -identifies the specific document (or _((resource))_) we are interested -in. - -Each machine connected to the Internet gets a unique _((IP address))_, -which looks something like `37.187.37.82`. You can use these directly -as the server part of a ((URL)). But lists of more or less random -numbers are hard to remember and awkward to type, so you can instead -register a _((domain)) name_ to point toward a specific machine or -set of machines. I registered _eloquentjavascript.net_ to point at the -IP address of a machine I control and can thus use that domain name -to serve web pages. - -(((browser)))If you type the previous URL into your browser's ((address -bar)), it will try to retrieve and display the ((document)) at that -URL. First, your browser has to find out what address -_eloquentjavascript.net_ refers to. Then, using the ((HTTP)) protocol, -it makes a connection to the server at that address and asks for the -resource _/12_browser.html_. - -We will take a closer look at the HTTP protocol in -link:17_http.html#http[Chapter 17]. - -== HTML == - -indexsee:[Hypertext Markup Language,HTML] -(((HTML)))HTML, which stands for _Hypertext Markup Language_, is the -document format used for web pages. An HTML document contains -((text)), as well as _((tag))s_ that give structure to the text, -describing things such as links, paragraphs, and headings. - -A simple HTML document looks like this: - -[source,text/html] ----- - - - - My home page - - -

    My home page

    -

    Hello, I am Marijn and this is my home page.

    -

    I also wrote a book! Read it - here.

    - - ----- - -ifdef::book_target[] - -This is what such a document would look like in the browser: - -image::img/home-page.png[alt="My home page",width="6.3cm"] - -endif::book_target[] - -(((angle brackets)))The tags, wrapped in angle brackets (`<` -and `>`), provide information about the ((structure)) of the -document. The other ((text)) is just plain text. - -(((doctype)))(((version)))The document starts with ``, -which tells the browser to interpret it as _modern_ HTML, as opposed -to various dialects that were in use in the past. - -(((head (HTML tag))))(((body (HTML tag))))(((title (HTML tag))))(((h1 -(HTML tag))))(((p (HTML tag))))HTML documents have a head and a body. -The head contains information _about_ the document, and the body -contains the document itself. In this case, we first declared that the -title of this document is “My home page” and then gave a document -containing a heading (`

    `, meaning “heading 1”—++

    ++ to `

    ` -produce more minor headings) and two ((paragraph))s (`

    `). - -(((href attribute)))(((a (HTML tag))))Tags come in several forms. An -((element)), such as the body, a paragraph, or a link, is started by -an _((opening tag))_ like `

    ` and ended by a _((closing tag))_ like -`

    `. Some opening tags, such as the one for the ((link)) (``), -contain extra information in the form of `name="value"` pairs. These -are called _((attribute))s_. In this case, the destination of the link -is indicated with `href="http://eloquentjavascript.net"`, where `href` -stands for “hypertext reference”. - -(((src attribute)))(((self-closing tag)))(((img (HTML tag))))Some -kinds of ((tag))s do not enclose anything and thus do not need to be -closed. An example of this would be ``, which will display the ((image)) -found at the given source URL. - -(((escaping,in HTML)))To be able to include ((angle brackets)) in -the text of a document, even though they have a special meaning in -HTML, yet another form of special notation has to be introduced. A -plain opening angle bracket is written as `<` (“less than”), and -a closing bracket is written as `>` (“greater than”). In HTML, an ampersand -(`&`) character followed by a word and a semicolon (`;`) is called an -_((entity))_, and will be replaced by the character it encodes. - -(((backslash character)))(((ampersand character)))(((double-quote -character)))This is analogous to the way backslashes are used in -JavaScript strings. Since this mechanism gives ampersand characters a -special meaning, too, those need to be escaped as `&`. Inside an -attribute, which is wrapped in double quotes, `"` can be used to -insert an actual quote character. - -(((error tolerance)))(((parsing)))HTML is parsed in a remarkably -error-tolerant way. When tags that should be there are missing, the -browser reconstructs them. The way in which this is done has been -standardized, and you can rely on all modern browsers to do it in the -same way. - -The following document will be treated just like the one shown previously: - -[source,text/html] ----- - - -My home page - -

    My home page

    -

    Hello, I am Marijn and this is my home page. -

    I also wrote a book! Read it - here. ----- - -(((title (HTML tag))))(((head (HTML tag))))(((body (HTML -tag))))(((html (HTML tag))))The ``, ``, and `` tags -are gone completely. The browser knows that `` belongs in a -head, and that `<h1>` in a body. Furthermore, I am no longer explicitly -closing the paragraphs since opening a new paragraph or ending the -document will close them implicitly. The quotes around the link target -are also gone. - -This book will usually omit the `<html>`, `<head>`, and `<body>` tags -from examples to keep them short and free of clutter. But I _will_ -close tags and include quotes around attributes. - -(((browser)))I will also usually omit the ((doctype)). This is not to -be taken as an encouragement to omit doctype declarations. Browsers -will often do ridiculous things when you forget them. You should -consider doctypes implicitly present in examples, even when they are -not actually shown in the text. - -[[script_tag]] -== HTML and JavaScript == - -(((JavaScript,in HTML)))(((script (HTML tag))))In the context of this -book, the most important ((HTML)) tag is `<script>`. This tag allows -us to include a piece of JavaScript in a document. - -[source,text/html] ----- -<h1>Testing alert</h1> -<script>alert("hello!");</script> ----- - -(((alert function)))(((timeline)))Such a script will run as soon as -its `<script>` tag is encountered as the browser reads the HTML. The -page shown earlier will pop up an `alert` dialog when opened. - -(((src attribute)))Including large programs directly in HTML documents -is often impractical. The `<script>` tag can be given an `src` -attribute in order to fetch a script file (a text file containing a -JavaScript program) from a URL. - -[source,text/html] ----- -<h1>Testing alert</h1> -<script src="code/hello.js"></script> ----- - -The _code/hello.js_ file included here contains the same simple program, -`alert("hello!")`. When an HTML page references other URLs as part of -itself, for example an image file or a script—web browsers will -retrieve them immediately and include them in the page. - -(((script (HTML tag))))(((closing tag)))A script tag must always be -closed with `</script>`, even if it refers to a script file and -doesn't contain any code. If you forget this, the rest of the page -will be interpreted as part of the script. - -(((button (HTML tag))))(((onclick attribute)))Some attributes can also -contain a JavaScript program. The `<button>` tag shown next (which shows up -as a button) has an `onclick` attribute, whose content will be run -whenever the button is clicked. - -[source,text/html] ----- -<button onclick="alert('Boom!');">DO NOT PRESS</button> ----- - -(((single-quote character)))(((escaping,in HTML)))Note that I had to -use single quotes for the string in the `onclick` attribute because -double quotes are already used to quote the whole attribute. I could -also have used `"`, but that'd make the program harder to read. - -== In the sandbox == - -(((malicious script)))(((World Wide -Web)))(((browser)))(((website)))(((security)))Running programs -downloaded from the ((Internet)) is potentially dangerous. You do not -know much about the people behind most sites you visit, and they do -not necessarily mean well. Running programs by people who do not mean -well is how you get your computer infected by ((virus))es, your data -stolen, and your accounts hacked. - -Yet the attraction of the Web is that you can surf it without -necessarily ((trust))ing all the pages you visit. This is why browsers -severely limit the things a JavaScript program may do: it can't look -at the files on your computer or modify anything not related to the -web page it was embedded in. - -(((isolation)))Isolating a programming environment in this way is -called _((sandbox))ing_, the idea being that the program is harmlessly -playing in a sandbox. But you should imagine this particular kind of -sandbox as having a cage of thick steel bars over it, which makes it -somewhat different from your typical playground sandbox. - -The hard part of sandboxing is allowing the programs enough room to be -useful yet at the same time restricting them from doing anything -dangerous. Lots of useful functionality, such as communicating with -other servers or reading the content of the copy-paste ((clipboard)), -can also be used to do problematic, ((privacy))-invading things. - -(((leak)))(((exploit)))(((security)))Every now and then, someone comes -up with a new way to circumvent the limitations of a ((browser)) and -do something harmful, ranging from leaking minor private information -to taking over the whole machine that the browser runs on. The browser -developers respond by fixing the hole, and all is well again—that is, -until the next problem is discovered, and hopefully publicized, rather -than secretly exploited by some government or ((mafia)). - -== Compatibility and the browser wars == - -(((Microsoft)))(((World Wide Web)))In the early stages of the -Web, a browser called ((Mosaic)) dominated the market. After a few -years, the balance had shifted to ((Netscape)), which was then, in -turn, largely supplanted by Microsoft's ((Internet Explorer)). At any -point where a single ((browser)) was dominant, that browser's vendor -would feel entitled to unilaterally invent new features for the Web. -Since most users used the same browser, ((website))s would simply -start using those features—never mind the other browsers. - -This was the dark age of ((compatibility)), often called the -_((browser wars))_. Web developers were left with not one unified Web -but two or three incompatible platforms. To make things worse, the -browsers in use around 2003 were all full of ((bug))s, and of course -the bugs were different for each ((browser)). Life was hard for people -writing web pages. - -(((Apple)))(((Internet Explorer)))(((Mozilla)))Mozilla ((Firefox)), a -not-for-profit offshoot of ((Netscape)), challenged Internet -Explorer's hegemony in the late 2000s. Because ((Microsoft)) was not -particularly interested in staying competitive at the time, Firefox -took quite a chunk of market share away from it. Around the same -time, ((Google)) introduced its ((Chrome)) browser, and Apple's -((Safari)) browser gained popularity, leading to a situation where -there were four major players, rather than one. - -(((compatibility)))The new players had a more serious attitude toward -((standards)) and better ((engineering)) practices, leading to less -incompatibility and fewer ((bug))s. Microsoft, seeing its market share -crumble, came around and adopted these attitudes. If you are starting -to learn web development today, consider yourself lucky. The latest -versions of the major browsers behave quite uniformly and have -relatively few bugs. - -(((World Wide Web)))That is not to say that the situation is perfect -just yet. Some of the people using the Web are, for reasons of inertia -or corporate policy, stuck with very old ((browser))s. Until those -browsers die out entirely, writing websites that work for them will -require a lot of arcane knowledge about their shortcomings and quirks. -This book is not about those ((quirks)). Rather, it aims to present -the modern, sane style of ((web programming)). diff --git a/13_dom.txt b/13_dom.txt deleted file mode 100644 index b0d1bb6b0..000000000 --- a/13_dom.txt +++ /dev/null @@ -1,1171 +0,0 @@ -:chap_num: 13 -:prev_link: 12_browser -:next_link: 14_event -:load_files: ["code/mountains.js", "code/chapter/13_dom.js"] - -= The Document Object Model = - -(((drawing)))(((parsing)))When you open a web page in your browser, the browser -retrieves the page's ((HTML)) text and parses it, much like the way -our parser from link:11_language.html#parsing[Chapter 11] parsed -programs. The browser builds up a model of the document's -((structure)) and then uses this model to draw the page on the screen. - -(((live data structure)))This representation of the ((document)) -is one of the toys that a JavaScript program has -available in its ((sandbox)). You can read from the model and also change it. It acts as a -_live_ data structure: when it is modified, the page on the screen is -updated to reflect the changes. - -== Document structure == - -You can imagine an ((HTML)) document as a nested set of ((box))es. -Tags such as `<body>` and `</body>` enclose other ((tag))s, which in -turn contain other tags or ((text)). Here's the example document from -the link:12_browser.html#browser[previous chapter]: - -[sandbox="homepage"] -[source,text/html] ----- -<!doctype html> -<html> - <head> - <title>My home page - - -

    My home page

    -

    Hello, I am Marijn and this is my home page.

    -

    I also wrote a book! Read it - here.

    - - ----- - -This page has the following structure: - -image::img/html-boxes.svg[alt="HTML document as nested boxes",width="7cm"] - -indexsee:[Document Object Model,DOM] -The data structure the browser uses to represent the document follows -this shape. For each box, there is an ((object)), which we can -interact with to find out things such as what HTML tag it represents and -which boxes and text it contains. This representation is called the -_Document Object Model_, or ((DOM)) for short. - -(((documentElement property)))(((head property)))(((body -property)))(((html (HTML tag))))(((body (HTML tag))))(((head (HTML -tag))))The global variable `document` gives us access to these -objects. Its `documentElement` property refers to the object -representing the `` tag. It also provides the properties `head` and -`body`, which hold the objects for those elements. - -== Trees == - -(((nesting,of objects)))Think back to the ((syntax tree))s from -link:11_language.html#parsing[Chapter 11] for a moment. Their -structures are strikingly similar to the structure of a browser's -document. Each _((node))_ may refer to other nodes, _children_, which -in turn may have their own children. This shape is typical of nested -structures where elements can contain sub-elements that are similar to -themselves. - -(((documentElement property)))We call a data structure a _((tree))_ -when it has a branching structure, has no ((cycle))s (a node may not -contain itself, directly or indirectly), and has a single, -well-defined “((root))”. In the case of the ((DOM)), -`document.documentElement` serves as the root. - -(((sorting)))(((data structure)))(((syntax tree)))Trees come up a lot -in computer science. In addition to representing recursive structures such as -HTML documents or programs, they are often used to maintain -sorted ((set))s of data because elements can usually be found or -inserted more efficiently in a sorted tree than in a sorted flat -array. - -(((leaf node)))(((Egg language)))A typical tree has different kinds of -((node))s. The syntax tree for link:11_language.html#language[the Egg -language] had variables, values, and application nodes. Application -nodes always have children, whereas variables and values are _leaves_, or -nodes without children. - -(((body property)))The same goes for the DOM. Nodes for regular -_((element))s_, which represent ((HTML)) tags, determine the structure -of the document. These can have ((child node))s. An example of such a -node is `document.body`. Some of these children can be ((leaf node))s, -such as pieces of ((text)) or ((comment))s (comments are written between -`` in HTML). - -(((text node)))(((ELEMENT_NODE code)))(((COMMENT_NODE -code)))(((TEXT_NODE code)))(((nodeType property)))Each DOM node object -has a `nodeType` property, which contains a numeric code that -identifies the type of node. Regular elements have the value 1, which -is also defined as the constant property `document.ELEMENT_NODE`. Text -nodes, representing a section of text in the document, have the value -3 (`document.TEXT_NODE`). Comments have the value 8 -(`document.COMMENT_NODE`). - -So another way to visualize our document ((tree)) is as follows: - -image::img/html-tree.svg[alt="HTML document as a tree",width="8cm"] - -The leaves are text nodes, and the arrows indicate parent-child -relationships between nodes. - -[[standard]] -== The standard == - -(((programming language)))(((interface,design)))Using cryptic numeric -codes to represent node types is not a very JavaScript-like thing to -do. Later in this chapter, we'll see that other parts of the -((DOM)) interface also feel cumbersome and alien. The reason for this -is that the DOM wasn't designed for just JavaScript. Rather, it tries -to define a language-neutral ((interface)) that can be used in other -systems as well—not just HTML but also ((XML)), which is a generic -((data format)) with an HTML-like syntax. - -(((consistency)))(((integration)))This is unfortunate. Standards are -often useful. But in this case, the advantage (cross-language -consistency) isn't all that compelling. Having an interface that is -properly integrated with the language you are using will save you more -time than having a familiar interface across languages. - -(((array-like object)))(((NodeList type)))As an example of such poor -integration, consider the `childNodes` property that element nodes in -the DOM have. This property holds an array-like object, with a -`length` property and properties labeled by numbers to access the -child nodes. But it is an instance of the `NodeList` type, not a real -array, so it does not have methods such as `slice` and `forEach`. - -(((interface,design)))(((DOM,construction)))(((side effect)))Then -there are issues that are simply poor design. For example, there is no -way to create a new node and immediately add children or attributes to -it. Instead, you have to first create it, then add the children one by -one, and finally set the attributes one by one, using side effects. Code that -interacts heavily with the DOM tends to get long, repetitive, and -ugly. - -(((library)))But these flaws aren't fatal. Since JavaScript -allows us to create our own ((abstraction))s, it is easy to write some -((helper function))s that allow you to express the operations you are -performing in a clearer and shorter way. In fact, many libraries -intended for browser programming come with such tools. - -== Moving through the tree == - -(((pointer)))DOM nodes contain a wealth of ((link))s to other nearby -nodes. The following diagram illustrates these: - -image::img/html-links.svg[alt="Links between DOM nodes",width="6cm"] - -(((child node)))(((parentNode property)))(((childNodes -property)))Although the diagram shows only one link of each type, -every node has a `parentNode` property that points to its containing -node. Likewise, every element node (node type 1) has a `childNodes` -property that points to an ((array-like object)) holding its children. - -(((firstChild property)))(((lastChild property)))(((previousSibling -property)))(((nextSibling property)))In theory, you could move -anywhere in the tree using just these parent and child links. But -JavaScript also gives you access to a number of additional convenience -links. The `firstChild` and `lastChild` properties point to the first -and last child elements or have the value `null` for nodes without -children. Similarly, `previousSibling` and `nextSibling` point to -adjacent nodes, which are nodes with the same parent that appear immediately -before or after the node itself. For a first child, `previousSibling` -will be null, and for a last child, `nextSibling` will be null. - -(((talksAbout function)))(((recursion)))(((nesting,of objects)))When -dealing with a nested data structure like this one, recursive functions -are often useful. The following recursive function scans a document for ((text node))s -containing a given string and returns `true` when it has found one: - -[[talksAbout]] -[sandbox="homepage"] -[source,javascript] ----- -function talksAbout(node, string) { - if (node.nodeType == document.ELEMENT_NODE) { - for (var i = 0; i < node.childNodes.length; i++) { - if (talksAbout(node.childNodes[i], string)) - return true; - } - return false; - } else if (node.nodeType == document.TEXT_NODE) { - return node.nodeValue.indexOf(string) > -1; - } -} - -console.log(talksAbout(document.body, "book")); -// → true ----- - -(((nodeValue property)))The `nodeValue` property of a text node refers -to the string of text that it represents. - -== Finding elements == - -(((DOM)))(((body property)))(((hard-coding)))Navigating these -((link))s among parents, children, and siblings is often useful, as in -the previous function, which runs through the whole document. But if we -want to find a specific node in the document, reaching it by starting -at `document.body` and blindly following a hard-coded path of links is -a bad idea. Doing so bakes assumptions into our program about the -precise structure of the document—a structure we might want to change -later. Another complicating factor is that text nodes are created even -for the ((whitespace)) between nodes. The example document's body tag -does not have just three children (`

    ` and two `

    ` elements) but -actually has seven: those three, plus the spaces before, after, and -between them. - -(((searching)))(((href attribute)))(((getElementsByTagName method)))So -if we want to get the `href` attribute of the link in that document, -we don't want to say something like “Get the second child of the sixth -child of the document body”. It'd be better if we could say “Get the -first link in the document”. And we can. - -[sandbox="homepage"] -[source,javascript] ----- -var link = document.body.getElementsByTagName("a")[0]; -console.log(link.href); ----- - -(((child node)))All element nodes have a `getElementsByTagName` -method, which collects all elements with the given tag name that are -descendants (direct or indirect children) of the given node and -returns them as an array-like object. - -(((id attribute)))(((getElementById method)))To find a specific -_single_ node, you can give it an `id` attribute and use -`document.getElementById` instead. - -[source,text/html] ----- -

    My ostrich Gertrude:

    -

    - - ----- - -(((getElementsByClassName method)))(((class attribute)))A third, -similar method is `getElementsByClassName`, which, like -`getElementsByTagName`, searches through the contents of an element -node and retrieves all elements that have the given string in their -`class` attribute. - -== Changing the document == - -(((side effect)))(((removeChild method)))(((appendChild -method)))(((insertBefore method)))(((DOM,construction)))Almost -everything about the ((DOM)) data structure can be changed. Element -nodes have a number of methods that can be used to change their -content. The `removeChild` method removes the given child node from -the document. To add a child, we can use `appendChild`, which puts it -at the end of the list of children, or `insertBefore`, which inserts -the node given as the first argument before the node given as the second -argument. - -[source,text/html] ----- -

    One

    -

    Two

    -

    Three

    - - ----- - -A node can exist in the document in only one place. Thus, inserting -paragraph “Three” in front of paragraph “One” will first remove it -from the end of the document and then insert it at the front, -resulting in “Three/One/Two”. All operations that insert a node -somewhere will, as a ((side effect)), cause it to be removed from its -current position (if it has one). - -(((insertBefore method)))(((replaceChild method)))The `replaceChild` -method is used to replace a child node with another one. It takes as -arguments two nodes: a new node and the node to be replaced. The -replaced node must be a child of the element the method is called on. -Note that both `replaceChild` and `insertBefore` expect the _new_ node -as their first argument. - -== Creating nodes == - -(((alt attribute)))(((img (HTML tag))))In the following example, we -want to write a script that replaces all ((image))s (`` tags) in -the document with the text held in their `alt` attributes, which -specifies an alternative textual representation of the image. - -(((createTextNode method)))This involves not only removing the images -but adding a new text node to replace them. For this, we use the -`document.createTextNode` method. - -[source,text/html] ----- -

    The Cat in the - Hat.

    - -

    - - ----- - -(((text node)))Given a string, `createTextNode` gives us a type 3 DOM -node (a text node), which we can insert into the document to make it -show up on the screen. - -(((live data structure)))(((getElementsByTagName -method)))(((childNodes property)))The loop that goes over the images -starts at the end of the list of nodes. This is necessary because the -node list returned by a method like `getElementsByTagName` (or a -property like `childNodes`) is __live__. That is, it is updated as the -document changes. If we started from the front, removing the first -image would cause the list to lose its first element so that the -second time the loop repeats, where `i` is 1, it would stop because -the length of the collection is now also 1. - -(((slice method)))If you want a _solid_ collection of nodes, as -opposed to a live one, you can convert the collection to a real array -by calling the array `slice` method on it. - -[source,javascript] ----- -var arrayish = {0: "one", 1: "two", length: 2}; -var real = Array.prototype.slice.call(arrayish, 0); -real.forEach(function(elt) { console.log(elt); }); -// → one -// two ----- - -(((createElement method)))To create regular ((element)) nodes (type -1), you can use the `document.createElement` method. This method takes -a tag name and returns a new empty node of the given type. - -[[elt]] -(((Popper+++,+++ Karl)))(((DOM,construction)))(((elt function)))The -following example defines a utility `elt`, which creates an element -node and treats the rest of its arguments as children to that node. -This function is then used to add a simple attribution to a quote. - -[source,text/html] ----- -
    - No book can ever be finished. While working on it we learn - just enough to find it immature the moment we turn away - from it. -
    - - ----- - -ifdef::book_target[] - -This is what the resulting document looks like: - -image::img/blockquote.png[alt="A blockquote with attribution",width="8cm"] - -endif::book_target[] - -== Attributes == - -(((href attribute)))Some element ((attribute))s, such as `href` for -links, can be accessed through a ((property)) of the same name on the -element's ((DOM)) object. This is the case for a limited set of -commonly used standard attributes. - -(((data attribute)))(((getAttribute method)))(((setAttribute -method)))But HTML allows you to set any attribute you want on nodes. -This can be useful because it allows you to store extra information in a -document. If you make up your own attribute names, though, such -attributes will not be present as a property on the element's node. -Instead, you'll have to use the `getAttribute` and `setAttribute` -methods to work with them. - -[source,text/html] ----- -

    The launch code is 00000000.

    -

    I have two feet.

    - - ----- - -I recommended prefixing the names of such made-up attributes with -`data-` to ensure they do not conflict with any other -attributes. - -(((programming language)))(((syntax highlighting example)))As a simple -example, we'll write a “syntax highlighter” that looks for `
    `
    -tags (“preformatted”, used for code and similar plaintext) with a
    -`data-language` attribute and crudely tries to highlight the
    -((keyword))s for that language.
    -
    -// include_code
    -
    -[sandbox="highlight"]
    -[source,javascript]
    -----
    -function highlightCode(node, keywords) {
    -  var text = node.textContent;
    -  node.textContent = ""; // Clear the node
    -
    -  var match, pos = 0;
    -  while (match = keywords.exec(text)) {
    -    var before = text.slice(pos, match.index);
    -    node.appendChild(document.createTextNode(before));
    -    var strong = document.createElement("strong");
    -    strong.appendChild(document.createTextNode(match[0]));
    -    node.appendChild(strong);
    -    pos = keywords.lastIndex;
    -  }
    -  var after = text.slice(pos);
    -  node.appendChild(document.createTextNode(after));
    -}
    -----
    -
    -(((pre (HTML tag))))(((syntax highlighting example)))(((highlightCode
    -function)))The function `highlightCode` takes a `
    ` node and a
    -((regular expression)) (with the “global” option turned on) that
    -matches the keywords of the programming language that the element
    -contains.
    -
    -(((strong (HTML tag))))(((clearing)))(((textContent property)))The
    -`textContent` property is used to get all the ((text)) in the node
    -and is then set to an empty string, which has the effect of emptying
    -the node. We loop over all matches of the keyword expression,
    -appending the text _between_ them as regular text nodes, and the text
    -matched (the keywords) as text nodes wrapped in `` (bold) elements.
    -
    -(((data attribute)))(((getElementsByTagName method)))We can
    -automatically highlight all programs on the page by looping over all
    -the `
    ` elements that have a `data-language` attribute and
    -calling `highlightCode` on each one with the correct regular
    -expression for the language.
    -
    -// include_code
    -
    -[sandbox="highlight"]
    -[source,javascript]
    -----
    -var languages = {
    -  javascript: /\b(function|return|var)\b/g /* … etc */
    -};
    -
    -function highlightAllCode() {
    -  var pres = document.body.getElementsByTagName("pre");
    -  for (var i = 0; i < pres.length; i++) {
    -    var pre = pres[i];
    -    var lang = pre.getAttribute("data-language");
    -    if (languages.hasOwnProperty(lang))
    -      highlightCode(pre, languages[lang]);
    -  }
    -}
    -----
    -
    -(((syntax highlighting example)))Here is an example:
    -
    -[sandbox="highlight"]
    -[source,text/html]
    -----
    -

    Here it is, the identity function:

    -
    -function id(x) { return x; }
    -
    - - ----- - -ifdef::book_target[] - -This produces a page that looks like this: - -image::img/highlighted.png[alt="A highlighted piece of code",width="4.8cm"] - -endif::book_target[] - -(((getAttribute method)))(((setAttribute method)))(((className -property)))(((class attribute)))There is one commonly used attribute, -`class`, which is a ((reserved word)) in the JavaScript language. For -historical reasons—some old JavaScript implementations could not -handle property names that matched keywords or reserved words—the -property used to access this attribute is called `className`. You can -also access it under its real name, `"class"`, by using the -`getAttribute` and `setAttribute` methods. - -== Layout == - -(((layout)))(((block element)))(((inline element)))(((p (HTML -tag))))(((h1 (HTML tag))))(((a (HTML tag))))(((strong (HTML tag))))You -might have noticed that different types of elements are laid out -differently. Some, such as paragraphs (`

    `) or headings (`

    `), -take up the whole width of the document and are rendered on separate -lines. These are called _block_ elements. Others, such as links -(``) or the `` element used in the previous example, are -rendered on the same line with their surrounding text. Such elements -are called _inline_ elements. - -(((drawing)))For any given document, browsers are able to compute a -layout, which gives each element a size and position based on its -type and content. This layout is then used to actually draw the -document. - -(((border (CSS))))(((offsetWidth property)))(((offsetHeight -property)))(((clientWidth property)))(((clientHeight -property)))(((dimensions)))The size and position of an element can be -accessed from JavaScript. The `offsetWidth` and `offsetHeight` -properties give you the space the element takes up in _((pixel))s_. A -pixel is the basic unit of measurement in the browser and typically -corresponds to the smallest dot that your screen can display. -Similarly, `clientWidth` and `clientHeight` give you the size of the -space _inside_ the element, ignoring border width. - -[source,text/html] ----- -

    - I'm boxed in -

    - - ----- - -ifdef::book_target[] - -Giving a paragraph a border causes a rectangle to be drawn around it. - -image::img/boxed-in.png[alt="A paragraph with a border",width="8cm"] - -endif::book_target[] - - -[[boundingRect]] -(((getBoundingClientRect method)))(((position)))(((pageXOffset -property)))(((pageYOffset property)))The most effective way to find -the precise position of an element on the screen is the -`getBoundingClientRect` method. It returns an object with `top`, -`bottom`, `left`, and `right` properties, indicating the pixel -positions of the sides of the element relative to the top left of the -screen. If you want them relative to the whole document, you must -add the current scroll position, found under the global `pageXOffset` -and `pageYOffset` variables. - -(((offsetHeight property)))(((getBoundingClientRect -method)))(((drawing)))(((laziness)))(((performance)))(((efficiency)))Laying -out a document can be quite a lot of work. In the interest of speed, -browser engines do not immediately re-layout a document every time it -is changed but rather wait as long as they can. When a JavaScript -program that changed the document finishes running, the browser will -have to compute a new layout in order to display the changed document -on the screen. When a program _asks_ for the position or size of -something by reading properties such as `offsetHeight` or calling -`getBoundingClientRect`, providing correct information also requires -computing a ((layout)). - -(((side effect)))(((optimization)))(((benchmark)))A program that -repeatedly alternates between reading DOM layout information and -changing the DOM forces a lot of layouts to happen and will -consequently run really slowly. The following code shows an example of -this. It contains two different programs that build up a line of _X_ -characters 2,000 pixels wide and measures the time each one takes. - -// test: nonumbers - -[source,text/html] ----- -

    -

    - - ----- - -== Styling == - -(((block element)))(((inline element)))(((style)))(((strong (HTML -tag))))(((a (HTML tag))))(((underline)))We have seen that different -HTML elements display different behavior. Some are displayed as -blocks, others inline. Some add styling, such as `` making its -content ((bold)) and `
    ` making it blue and underlining it. - -(((img (HTML tag))))(((default behavior)))(((style attribute)))The way -an `` tag shows an image or an `` tag causes a link to be -followed when it is clicked is strongly tied to the element type. But -the default styling associated with an element, such as the text color -or underline, can be changed by us. Here is an example using the `style` -property: - -[source,text/html] ----- -

    Normal link

    -

    Green link

    ----- - -ifdef::book_target[] - -The second link will be green instead of the default link color. - -image::img/colored-links.png[alt="A normal and a green link",width="2.2cm"] - -endif::book_target[] - -(((border (CSS))))(((color (CSS))))(((CSS)))(((colon character)))A -style attribute may contain one or more _((declaration))s_, which are -a property (such as `color`) followed by a colon and a value (such as -`green`). When there is more than one declaration, they must be -separated by ((semicolon))s, as in `"color: red; border: none"`. - -(((display (CSS))))(((layout)))There are a lot of aspects that can be -influenced by styling. For example, the `display` property controls -whether an element is displayed as a block or an inline element. - -[source,text/html] ----- -This text is displayed inline, -as a block, and -not at all. ----- - -(((hidden element)))The `block` tag will end up on its own line since -((block element))s are not displayed inline with the text around them. -The last tag is not displayed at all—`display: none` prevents an -element from showing up on the screen. This is a way to hide elements. -It is often preferable to removing them from the document -entirely because it makes it easy to reveal them again at a later time. - -ifdef::book_target[] - -image::img/display.png[alt="Different display styles",width="4cm"] - -endif::book_target[] - -(((color (CSS))))(((style attribute)))JavaScript code can directly -manipulate the style of an element through the node's `style` -property. This property holds an object that has properties for all -possible style properties. The values of these properties are strings, -which we can write to in order to change a particular aspect of the -element's style. - -[source,text/html] ----- -

    - Pretty text -

    - - ----- - -(((camel case)))(((capitalization)))(((dash character)))(((font-family -(CSS))))Some style property names contain dashes, such as `font-family`. -Because such property names are awkward to work with in JavaScript -(you'd have to say `style["font-family"]`), the property names in the -`style` object for such properties have their dashes removed and the -letters that follow them capitalized (`style.fontFamily`). - -== Cascading styles == - -indexsee:[Cascading Style Sheets,CSS] -(((rule (CSS))))(((style (HTML tag))))The styling system for HTML is called ((CSS)) -for _Cascading Style Sheets_. A _((style sheet))_ is a set of -rules for how to style elements in a document. It can be given -inside a ` -

    Now strong text is italic and gray.

    ----- - -(((rule (CSS))))(((font-weight (CSS))))(((overlay)))The _((cascading))_ in the name -refers to the fact that multiple such rules are combined to -produce the final style for an element. In the previous example, the -default styling for `` tags, which gives them `font-weight: -bold`, is overlaid by the rule in the ` - - ----- -endif::interactive_target[] - -!!hint!! - -(((createElement method)))(((table example)))(((appendChild -method)))Use `document.createElement` to create new element nodes, -`document.createTextNode` to create text nodes, and the `appendChild` -method to put nodes into other nodes. - -You should loop over the key names once to fill in the top row and -then again for each object in the array to construct the data -rows. - -Don't forget to return the enclosing `` element at the end of -the function. - -!!hint!! - -=== Elements by tag name === - -(((getElementsByTagName method)))(((recursion)))The -`getElementsByTagName` method returns all child elements with a given -tag name. Implement your own version of it as a regular nonmethod -function that takes a node and a string (the tag name) as arguments -and returns an array containing all descendant element nodes with the -given tag name. - -(((tagName property)))(((capitalization)))(((toLowerCase -method)))(((toUpperCase method)))To find the tag name of an element, -use its `tagName` property. But note that this will return the tag -name in all uppercase. Use the `toLowerCase` or `toUpperCase` string -method to compensate for this. - -ifdef::interactive_target[] - -// test: no - -[source,text/html] ----- -

    Heading with a span element.

    -

    A paragraph with one, two - spans.

    - - ----- -endif::interactive_target[] - -!!hint!! - -(((getElementsByTagName method)))(((recursion)))The solution is most -easily expressed with a recursive function, similar to the -link:13_dom.html#talksAbout[`talksAbout` function] defined earlier in -this chapter. - -(((concatenation)))(((concat method)))(((closure)))You could call -`byTagname` itself recursively, concatenating the resulting arrays to -produce the output. For a more efficient approach, define an inner -function that calls itself recursively and that has access to an -array variable defined in the outer function to which it can add the -matching elements it finds. Don't forget to call the ((inner -function)) once from the outer function. - -(((nodeType property)))(((ELEMENT_NODE code)))The recursive function -must check the node type. Here we are interested only in node type 1 -(`document.ELEMENT_NODE`). For such nodes, we must loop over their -children and, for each child, see whether the child matches the query while also doing -a recursive call on it to inspect its own children. - -!!hint!! - -=== The cat's hat === - -(((cat's hat (exercise))))Extend the cat ((animation)) defined -link:13_dom.html#animation[earlier] so that both the cat and his hat -(``) orbit at opposite sides of the ellipse. - -Or make the hat circle around the cat. Or alter the animation in some -other interesting way. - -(((absolute positioning)))(((top (CSS))))(((left (CSS))))(((position -(CSS))))To make positioning multiple objects easier, it is probably a -good idea to switch to absolute positioning. This means that `top` and -`left` are counted relative to the top left of the document. To avoid -using negative coordinates, you can simply add a fixed number of -pixels to the position values. - -ifdef::interactive_target[] - -// test: no - -[source,text/html] ----- - - - - ----- - -endif::interactive_target[] diff --git a/14_event.txt b/14_event.txt deleted file mode 100644 index 9df5ec0cb..000000000 --- a/14_event.txt +++ /dev/null @@ -1,1127 +0,0 @@ -:chap_num: 14 -:prev_link: 13_dom -:next_link: 15_game - -= Handling Events = - -[chapterquote="true"] -[quote,Marcus Aurelius,Meditations] -____ -You have power over your mind—not -outside events. Realize this, and you will find strength. -____ - -(((stoicism)))(((Marcus Aurelius)))(((input)))(((timeline)))(((control -flow)))Some programs work with direct user input, such as mouse and -keyboard interaction. The timing and order of such input can't be -predicted in advance. This requires a different approach to control -flow than the one we have used so far. - -== Event handlers == - -(((polling)))(((button)))(((real-time)))Imagine an interface where the -only way to find out whether a key on the keyboard is being pressed is to read the -current state of that key. To be able to react to keypresses, -you would have to constantly read the key's state so that -you'd catch it before it's released again. It would be dangerous to -perform other time-intensive computations since you might miss a -keypress. - -That is how such input was handled on primitive machines. A step -up would be for the hardware or operating system to notice the -keypress and put it in a queue. A program can then periodically check the -queue for new events and react to what it finds there. - -(((responsiveness)))(((user experience)))Of course, it has to remember -to look at the queue, and to do it often, because any time between the -key being pressed and the program noticing the event will cause the -software to feel unresponsive. This approach is called _((polling))_. -Most programmers avoid it whenever possible. - -(((callback function)))(((event handling)))A better mechanism is for -the underlying system to give our code a chance to react -to events as they occur. Browsers do this by allowing us to register -functions as _handlers_ for specific events. - -[source,text/html] ----- -

    Click this document to activate the handler.

    - ----- - -(((click event)))(((addEventListener method)))The `addEventListener` -function registers its second argument to be called whenever the event -described by its first argument occurs. - -== Events and DOM nodes == - -(((addEventListener method)))(((event handling)))Each ((browser)) -event handler is registered in a context. When you call -`addEventListener` as shown previously, you are calling it as a method on the -whole ((window)) because in the browser the ((global scope)) is -equivalent to the `window` object. Every ((DOM)) element has its own -`addEventListener` method, which allows you to listen specifically on -that element. - -[source,text/html] ----- - -

    No handler here.

    - ----- - -(((click event)))(((button (HTML tag))))The example attaches a handler -to the button node. Thus, clicks on the button cause that handler to -run, whereas clicks on the rest of the document do not. - -(((onclick attribute)))(((encapsulation)))Giving a node an `onclick` -attribute has a similar effect. But a node has only one `onclick` -attribute, so you can register only one handler per node that way. The -`addEventListener` method allows you to add any number of handlers, so -you can't accidentally replace a handler that has already been -registered. - -(((removeEventListener method)))The `removeEventListener` method, -called with arguments similar to as `addEventListener`, removes a -handler. - -[source,text/html] ----- - - ----- - -(((function,as value)))To be able to unregister a handler function, we -give it a name (such as `once`) so that we -can pass it to both `addEventListener` and `removeEventListener`. - -== Event objects == - -(((which property)))(((event handling)))Though we have ignored it in -the previous examples, event handler functions are passed an argument: -the _((event object))_. This object gives us additional information -about the event. For example, if we want to know _which_ ((mouse -button)) was pressed, we can look at the event object's `which` property. - -[source,text/html] ----- - - ----- - -(((event type)))(((type property)))The information stored in an event -object differs per type of event. We'll discuss various types later -in this chapter. The object's `type` property always holds a string -identifying the event (for example `"click"` or `"mousedown"`). - -== Propagation == - -indexsee:[bubbling,event propagation] -indexsee:[propagation,event propagation] -(((event propagation)))(((parent node)))Event handlers registered on -nodes with children will also receive some events that happen in the -children. If a button inside a paragraph is clicked, event handlers on -the paragraph will also receive the click event. - -(((event handling)))But if both the paragraph and the button have a -handler, the more specific handler—the one on the button—gets to go -first. The event is said to _propagate_ outward, from the node where -it happened to that node's parent node and on to the root of the -document. Finally, after all handlers registered on a specific node -have had their turn, handlers registered on the whole ((window)) get a -chance to respond to the event. - -(((stopPropagation method)))(((click event)))At any point, an event -handler can call the `stopPropagation` method on the event object to -prevent handlers “further up” from receiving the event. This can be -useful when, for example, you have a button inside another clickable -element and you don't want clicks on the button to activate the outer -element's click behavior. - -(((mousedown event)))The following example registers `"mousedown"` -handlers on both a button and the paragraph around it. When clicked -with the right mouse button, the handler for the button calls -`stopPropagation`, which will prevent the handler on the paragraph -from running. When the button is clicked with another ((mouse -button)), both handlers will run. - -[source,text/html] ----- -

    A paragraph with a .

    - ----- - -(((event propagation)))(((target property)))Most event objects have a -`target` property that refers to the node where they originated. You -can use this property to ensure that you're not accidentally handling -something that propagated up from a node you do not want to handle. - -It is also possible to use the `target` property to cast a wide net -for a specific type of event. For example, if you have a node -containing a long list of buttons, it may be more convenient to -register a single click handler on the outer node and have it use the -`target` property to figure out whether a button was clicked, rather -than register individual handlers on all of the buttons. - -[source,text/html] ----- - - - - ----- - -== Default actions == - -(((scrolling)))(((default behavior)))(((event handling)))Many events -have a default action associated with them. If you click a ((link)), -you will be taken to the link's target. If you press the down arrow, -the browser will scroll the page down. If you right-click, you'll get -a context menu. And so on. - -(((preventDefault method)))For most types of events, the JavaScript -event handlers are called _before_ the default behavior is performed. -If the handler doesn't want the normal behavior to happen, typically -because it has already taken care of handling the event, it can call -the `preventDefault` method on the event object. - -(((expectation)))This can be used to implement your own ((keyboard)) -shortcuts or ((context menu)). It can also be used to obnoxiously -interfere with the behavior that users expect. For example, here is a -link that cannot be followed: - -[source,text/html] ----- -MDN - ----- - -Try not to do such things unless you have a really good reason to. For -people using your page, it can be unpleasant when the behavior -they expect is broken. - -Depending on the browser, some events can't be intercepted. On -Chrome, for example, ((keyboard)) shortcuts to close the current tab -(Ctrl-W or Command-W) cannot be handled by JavaScript. - -== Key events == - -(((keyboard)))(((keydown event)))(((keyup event)))(((event -handling)))When a key on the keyboard is pressed, your browser fires a -`"keydown"` event. When it is released, a `"keyup"` event fires. - -[source,text/html] -[focus="yes"] ----- -

    This page turns violet when you hold the V key.

    - ----- - -(((repeating key)))Despite its name, `"keydown"` fires not only -when the key is physically pushed down. When a key is pressed and -held, the event fires again every time the key _repeats_. -Sometimes—for example if you want to increase the acceleration of a -((game)) character when an arrow key is pressed and decrease it again -when the key is released—you have to be careful not to increase it -again every time the key repeats or you'd end up with unintentionally -huge values. - -(((keyCode property)))(((key code)))The previous example looked at the -`keyCode` property of the event object. This is how you can identify -which key is being pressed or released. Unfortunately, it's not -always obvious how to translate the numeric key code to an actual -key. - -(((event object)))(((charCodeAt method)))For letter and number keys, -the associated key code will be the ((Unicode)) character code -associated with the (uppercase) letter or number printed on the key. -The `charCodeAt` method on ((string))s gives us a way to find this -code. - -[source,javascript] ----- -console.log("Violet".charCodeAt(0)); -// → 86 -console.log("1".charCodeAt(0)); -// → 49 ----- - -Other keys have less predictable ((key code))s. The best way to find -the codes you need is usually by ((experiment))ing—register a key event -handler that logs the key codes it gets and press the key you are -interested in. - -(((modifier key)))(((shift key)))(((control key)))(((alt key)))(((meta -key)))(((command key)))(((ctrlKey property)))(((shiftKey -property)))(((altKey property)))(((metaKey property)))Modifier keys -such as Shift, Ctrl, Alt, and Meta (Command on Mac) generate key -events just like normal keys. But when looking for key combinations, -you can also find out whether these keys are held down by looking -at the `shiftKey`, `ctrlKey`, `altKey`, and `metaKey` properties of -keyboard and mouse events. - -[source,text/html] -[focus="yes"] ----- -

    Press Ctrl-Space to continue.

    - ----- - -(((typing)))(((fromCharCode function)))(((charCode -property)))(((keydown event)))(((keyup event)))(((keypress event)))The -`"keydown"` and `"keyup"` events give you information about the -physical key that is being pressed. But what if you are interested in -the actual ((text)) being typed? Getting that text from key codes is -awkward. Instead, there exists another event, `"keypress"`, which -fires right after `"keydown"` (and repeated along with `"keydown"` -when the key is held) but only for keys that produce character input. -The `charCode` property in the event object contains a code that can -be interpreted as a ((Unicode)) character code. We can use the -`String.fromCharCode` function to turn this code into an -actual single-((character)) ((string)). - -[source,text/html] -[focus="yes"] ----- -

    Focus this page and type something.

    - ----- - -(((button (HTML tag))))(((tabindex attribute))) The ((DOM)) node where -a key event originates depends on the element that has ((focus)) when -the key is pressed. Normal nodes cannot have focus (unless you give -them a `tabindex` attribute), but things such as ((link))s, buttons, and -form fields can. We'll come back to form ((field))s in -link:18_forms.html#forms[Chapter 18]. When nothing in particular has -focus, `document.body` acts as the target node of key events. - -== Mouse clicks == - -(((mousedown event)))(((mouseup event)))(((mouse cursor)))Pressing a -((mouse button)) also causes a number of events to fire. The -`"mousedown"` and `"mouseup"` events are similar to `"keydown"` and -`"keyup"` and fire when the button is pressed and released. -These will happen on the DOM nodes that are immediately below the -mouse pointer when the event occurs. - -(((click event)))After the `"mouseup"` event, a `"click"` event -fires on the most specific node that contained both the press and the -release of the button. For example, if I press down the mouse button -on one paragraph and then move the pointer to another paragraph and -release the button, the `"click"` event will happen on the element -that contains both those paragraphs. - -(((dblclick event)))(((double click)))If two clicks happen close -together, a `"dblclick"` (double-click) event also fires, after the -second click event. - -(((pixel)))(((pageX property)))(((pageY property)))(((event -object)))To get precise information about the place where a mouse -event happened, you can look at its `pageX` and `pageY` properties, -which contain the event's ((coordinates)) (in pixels) relative to the -top-left corner of the document. - -[[mouse_drawing]] -(((border-radius (CSS))))(((absolute positioning)))(((drawing program -example)))The following implements a primitive drawing program. Every -time you click the document, it adds a dot under your mouse -pointer. See link:19_paint.html#paint[Chapter 19] for a less primitive -drawing program. - -[source,text/html] ----- - - ----- - -(((clientX property)))(((clientY property)))(((getBoundingClientRect -method)))(((event object)))The `clientX` and `clientY` properties are -similar to `pageX` and `pageY` but relative to the part of the -document that is currently scrolled into view. These can be useful -when comparing mouse coordinates with the ((coordinates)) returned by -`getBoundingClientRect`, which also returns ((viewport))-relative -coordinates. - -== Mouse motion == - -(((mousemove event)))Every time the mouse pointer moves, a -`"mousemove"` event fires. This event can be used to track the -position of the mouse. A common situation in which this is useful is -when implementing some form of mouse-((dragging)) functionality. - -(((draggable bar example)))As an example, the following program displays a -bar and sets up event handlers so that dragging to the left or right -on this bar makes it narrower or wider: - -[source,text/html] ----- -

    Drag the bar to change its width:

    -
    -
    - ----- - -ifdef::book_target[] - -The resulting page looks like this: - -image::img/drag-bar.png[alt="A draggable bar",width="5.3cm"] - -endif::book_target[] - -(((mouseup event)))(((mousemove event)))Note that the `"mousemove"` -handler is registered on the whole ((window)). Even if the mouse goes -outside of the bar during resizing, we still want to update its size -and stop dragging when the mouse is released. - -(((buttons property)))(((which property)))We must stop resizing the -bar when the mouse button is released. Unfortunately, not all browsers -give `"mousemove"` events a meaningful `which` property. There is a -standard property called `buttons`, which provides similar -information, but that is also not supported on all browsers. -Fortunately, all major browsers support either `buttons` or `which`, -so the `buttonPressed` function in the example first tries `buttons`, -and falls back to `which` when that isn't available. - -(((mouseover event)))(((mouseout event)))Whenever the mouse pointer -enters or leaves a node, a `"mouseover"` or `"mouseout"` event -fires. These two events can be used, among other things, to create -((hover effect))s, showing or styling something when the mouse is over -a given element. - -(((event propagation)))Unfortunately, creating such an effect is not -as simple as starting the effect on `"mouseover"` and ending it on -`"mouseout"`. When the mouse moves from a node onto one of its -children, `"mouseout"` fires on the parent node, though the mouse -did not actually leave the node's extent. To make things worse, these -events propagate just like other events, and thus you will also -receive `"mouseout"` events when the mouse leaves one of the ((child -node))s of the node on which the handler is registered. - -(((isInside function)))(((relatedTarget property)))(((target -property)))To work around this problem, we can use the `relatedTarget` -property of the event objects created for these events. It tells us, -in the case of `"mouseover"`, what element the pointer was over -before and, in the case of `"mouseout"`, what element it is going to. -We want to change our hover effect only when the `relatedTarget` is -outside of our target node. Only in that case does this event actually -represent a _crossing over_ from outside to inside the node (or the -other way around). - -[source,text/html] ----- -

    Hover over this paragraph.

    - ----- - -The `isInside` function follows the given node's parent links until it -either reaches the top of the document (when `node` becomes null) or -finds the parent we are looking for. - -I should add that a ((hover effect)) like this can be much more easily -achieved using the ((CSS)) _((pseudoselector))_ `:hover`, as the next -example shows. But when your hover effect involves doing something -more complicated than changing a style on the target node, you must use the trick -with `"mouseover"` and `"mouseout"` events. - -[source,text/html] ----- - -

    Hover over this paragraph.

    ----- - -== Scroll events == - -(((scrolling)))(((scroll event)))(((event handling)))Whenever an -element is scrolled, a `"scroll"` event fires on it. This has -various uses, such as knowing what the user is currently looking at -(for disabling off-screen ((animation))s or sending ((spy)) reports to -your evil headquarters) or showing some indication of progress (by -highlighting part of a table of contents or showing a page number). - -The following example draws a ((progress bar)) in the top-right corner of -the document and updates it to fill up as you scroll down: - -[source,text/html] ----- - -
    -

    Scroll me...

    - ----- - -(((unit (CSS))))(((scrolling)))(((position (CSS))))(((fixed -positioning)))(((absolute positioning)))(((percent)))Giving an element -a `position` of `fixed` acts much like an `absolute` position but -also prevents it from scrolling along with the rest of the document. -The effect is to make our progress bar stay in its corner. Inside -it is another element, which is resized to indicate the current -progress. We use `%`, rather than `px`, as a unit when setting the -width so that the element is sized relative to the whole bar. - -(((innerHeight property)))(((innerWidth property)))(((pageYOffset -property)))The global `innerHeight` variable gives us the height of -the window, which we have to subtract from the total scrollable -height—you can't keep scrolling when you hit the bottom of the -document. (There's also an `innerWidth` to go along with -`innerHeight`.) By dividing `pageYOffset`, the current scroll -position, by the maximum scroll position and multiplying by 100, -we get the percentage for the progress bar. - -(((preventDefault method)))Calling `preventDefault` on a scroll event -does not prevent the scrolling from happening. In fact, the event -handler is called only _after_ the scrolling takes place. - -== Focus events == - -(((event handling)))(((focus event)))(((blur event)))When an element -gains ((focus)), the browser fires a `"focus"` event on it. When it -loses focus, a `"blur"` event fires. - -(((event propagation)))Unlike the events discussed earlier, these two -events do not propagate. A handler on a parent element is not notified -when a child element gains or loses focus. - -(((input (HTML tag))))(((help text example)))The following example -displays help text for the ((text field)) that currently has -focus: - -[source,text/html] ----- -

    Name:

    -

    Age:

    -

    - - ----- - -ifdef::book_target[] - -In the following screenshot, the help text for the age field is shown. - -image::img/help-field.png[alt="Providing help when a field is focused",width="4.4cm"] - -endif::book_target[] - -(((focus event)))(((blur event)))The ((window)) object will receive -`"focus"` and `"blur"` events when the user moves from or to the -browser tab or window in which the document is shown. - -== Load event == - -(((script (HTML tag))))(((load event)))When a page finishes loading, -the `"load"` event fires on the window and the document body -objects. This is often used to schedule ((initialization)) actions -that require the whole ((document)) to have been built. Remember that -the content of ` ----- - -(((clearTimeout function)))Sometimes you need to cancel a function you -have scheduled. This is done by storing the value returned by -`setTimeout` and calling `clearTimeout` on it. - -[source,javascript] ----- -var bombTimer = setTimeout(function() { - console.log("BOOM!"); -}, 500); - -if (Math.random() < 0.5) { // 50% chance - console.log("Defused."); - clearTimeout(bombTimer); -} ----- - -(((cancelAnimationFrame function)))(((requestAnimationFrame -function)))The `cancelAnimationFrame` function works in the same way -as ++clearTimeout++—calling it on a value returned by -`requestAnimationFrame` will cancel that frame (assuming it hasn't -already been called). - -(((setInterval function)))(((clearInterval -function)))(((repetition)))A similar set of functions, `setInterval` -and `clearInterval` are used to set timers that should repeat every _X_ -milliseconds. - -[source,javascript] ----- -var ticks = 0; -var clock = setInterval(function() { - console.log("tick", ticks++); - if (ticks == 10) { - clearInterval(clock); - console.log("stop."); - } -}, 200); ----- - -== Debouncing == - -(((optimization)))(((mousemove event)))(((scroll -event)))(((blocking)))Some types of events have the potential to fire -rapidly, many times in a row (the `"mousemove"` and `"scroll"` events, -for example). When handling such events, you must be careful not to do -anything too time-consuming or your handler will take up so much time -that interaction with the document starts to feel slow and choppy. - -(((setTimeout function)))If you do need to do something nontrivial in -such a handler, you can use `setTimeout` to make sure you are not -doing it too often. This is usually called _((debouncing))_ the event. -There are several slightly different approaches to this. - -(((textarea (HTML tag))))(((clearTimeout function)))(((keydown -event)))In the first example, we want to do something when the user -has typed something, but we don't want to do it immediately for every -key event. When they are ((typing)) quickly, we just want to wait -until a pause occurs. Instead of immediately performing an action in -the event handler, we set a timeout instead. We also clear the -previous timeout (if any) so that when events occur close together -(closer than our timeout delay), the timeout from the previous event -will be canceled. - -[source,text/html] ----- - - ----- - -(((sloppy programming)))Giving an undefined value to `clearTimeout` or -calling it on a timeout that has already fired has no effect. Thus, we -don't have to be careful about when to call it, and we simply do so -for every event. - -(((mousemove event)))We can use a slightly different pattern if we -want to space responses so that they're separated by at least a -certain length of ((time)) but want to fire them _during_ a series of -events, not just afterward. For example, we might want to respond to -`"mousemove"` events by showing the current coordinates of the mouse, -but only every 250 milliseconds. - -[source,text/html] ----- - ----- - -== Summary == - -Event handlers make it possible to detect and react to events we have -no direct control over. The `addEventListener` method is used to -register such a handler. - -Each event has a type (`"keydown"`, `"focus"`, and so on) that identifies -it. Most events are called on a specific DOM element and then -_propagate_ to that element's ancestors, allowing handlers associated -with those elements to handle them. - -When an event handler is called, it is passed an event object with -additional information about the event. This object also has methods -that allow us to stop further propagation (`stopPropagation`) and -prevent the browser's default handling of the event -(`preventDefault`). - -Pressing a key fires `"keydown"`, `"keypress"`, and `"keyup"` events. -Pressing a mouse button fires `"mousedown"`, `"mouseup"`, and -`"click"` events. Moving the mouse fires `"mousemove"` and possibly -`"mouseenter"` and `"mouseout"` events. - -Scrolling can be detected with the `"scroll"` event, and focus changes -can be detected with the `"focus"` and `"blur"` events. When the document finishes -loading, a `"load"` event fires on the window. - -Only one piece of JavaScript program can run at a time. Thus, event -handlers and other scheduled scripts have to wait until other scripts -finish before they get their turn. - -== Exercises == - -=== Censored keyboard === - -(((Turkish)))(((Kurds)))(((censored keyboard (exercise))))Between 1928 -and 2013, Turkish law forbade the use of the letters _Q_, _W_, and _X_ -in official documents. This was part of a wider initiative to stifle -Kurdish culture—those letters occur in the language used by Kurdish -people but not in Istanbul Turkish. - -(((typing)))(((input (HTML tag))))As an exercise in doing ridiculous -things with technology, I'm asking you to program a ((text field)) (an -`` tag) that these letters cannot be typed into. - -(((clipboard)))(Do not worry about copy and paste and other such -loopholes.) - -ifdef::interactive_target[] - -// test: no - -[source,text/html] ----- - - ----- - -endif::interactive_target[] - -!!hint!! - -(((keypress event)))(((keydown event)))(((preventDefault -method)))(((censored keyboard (exercise))))The solution to this -exercise involves preventing the ((default behavior)) of key events. -You can handle either `"keypress"` or `"keydown"`. If either of them -has `preventDefault` called on it, the letter will not appear. - -(((keyCode property)))(((charCode -property)))(((capitalization)))Identifying the letter typed requires -looking at the `keyCode` or `charCode` property and comparing that -with the codes for the letters you want to filter. In `"keydown"`, you -do not have to worry about lowercase and uppercase letters, since it -identifies only the key pressed. If you decide to handle `"keypress"` -instead, which identifies the actual character typed, you have to make -sure you test for both cases. One way to do that would be this: - ----- -/[qwx]/i.test(String.fromCharCode(event.charCode)) ----- - -!!hint!! - -=== Mouse trail === - -(((animation)))(((mouse trail (exercise))))In JavaScript's early days, -which was the high time of ((gaudy home pages)) with lots of animated -images, people came up with some truly inspiring ways to use the -language. - -One of these was the “mouse trail”—a series of images that would -follow the mouse pointer as you moved it across the page. - -(((absolute positioning)))(((background (CSS))))In this exercise, I -want you to implement a mouse trail. Use absolutely positioned `
    ` -elements with a fixed size and background color (refer to the -link:14_event.html#mouse_drawing[code] in the “Mouse Clicks” -section for an example). Create a bunch of such elements and, when the -mouse moves, display them in the wake of the mouse pointer. - -(((mousemove event)))There are various possible approaches here. You -can make your solution as simple or as complex as you want. A simple -solution to start with is to keep a fixed number of trail elements and -cycle through them, moving the next one to the mouse's current -position every time a `"mousemove"` event occurs. - -ifdef::interactive_target[] - -// test: no - -[source,text/html] ----- - - - ----- - -endif::interactive_target[] - -!!hint!! - -(((mouse trail (exercise))))Creating the elements is best done in a -loop. Append them to the document to make them show up. To be -able to access them later to change their position, store the trail -elements in an array. - -(((mousemove event)))(((array,indexing)))(((remainder operator)))(((% -operator)))Cycling through them can be done by keeping a ((counter -variable)) and adding 1 to it every time the `"mousemove"` event -fires. The remainder operator (`% 10`) can then be used to get a valid -array index to pick the element you want to position during a given -event. - -(((simulation)))(((requestAnimationFrame function)))Another -interesting effect can be achieved by modeling a simple ((physics)) -system. Use the `"mousemove"` event only to update a pair of variables -that track the mouse position. Then use `requestAnimationFrame` to -simulate the trailing elements being attracted to the position of the -mouse pointer. At every animation step, update their position based on -their position relative to the pointer (and, optionally, a speed that -is stored for each element). Figuring out a good way to do this is up -to you. - -!!hint!! - -=== Tabs === - -(((tabbed interface (exercise))))A tabbed interface is a common design -pattern. It allows you to select an interface panel by choosing from -a number of tabs “sticking out” above an element. - -(((button (HTML tag))))(((display (CSS))))(((hidden element)))(((data -attribute)))In this exercise you'll implement a simple tabbed -interface. Write a function, `asTabs`, that takes a DOM node and -creates a tabbed interface showing the child elements of that node. It -should insert a list of `
    ` element. This nicely corresponds to -the structure of the `grid` property in the level—each row of the grid -is turned into a table row (`` element). The strings in the grid -are used as class names for the table cell (`
    `) elements. The -following CSS helps the resulting table look like the background we -want: - -[source,text/css] ----- -.background { background: rgb(52, 166, 251); - table-layout: fixed; - border-spacing: 0; } -.background td { padding: 0; } -.lava { background: rgb(255, 100, 100); } -.wall { background: white; } ----- - -(((padding (CSS))))Some of these (`table-layout`, `border-spacing`, -and `padding`) are simply used to suppress unwanted default behavior. -We don't want the layout of the ((table)) to depend upon the contents -of its cells, and we don't want space between the ((table)) cells or -padding inside them. - -(((background (CSS))))(((rgb (CSS))))(((CSS)))The `background` rule -sets the background color. CSS allows colors to be specified both as -words (`white`) and with a format such as `rgb(R, G, B)`, where the red, -green, and blue components of the color are separated into three -numbers from 0 to 255. So, in `rgb(52, 166, 251)`, the red component is -52, green is 166, and blue is 251. Since the blue component is the -largest, the resulting color will be bluish. You can see that in the -`.lava` rule, the first number (red) is the largest. - -We draw each ((actor)) by creating a ((DOM)) element for it and -setting that element's position and size based on the actor's properties. The -values have to be multiplied by `scale` to go from game units to -pixels. - -// include_code - -[source,javascript] ----- -DOMDisplay.prototype.drawActors = function() { - var wrap = elt("div"); - this.level.actors.forEach(function(actor) { - var rect = wrap.appendChild(elt("div", - "actor " + actor.type)); - rect.style.width = actor.size.x * scale + "px"; - rect.style.height = actor.size.y * scale + "px"; - rect.style.left = actor.pos.x * scale + "px"; - rect.style.top = actor.pos.y * scale + "px"; - }); - return wrap; -}; ----- - -(((position (CSS))))(((class attribute)))To give an element more than one -class, we separate the class names by spaces. In the -((CSS)) code shown next, the `actor` class gives the actors their -absolute position. Their type name is used as an extra class to give -them a color. We don't have to define the `lava` class again because we reuse -the class for the lava grid squares which we defined earlier. - -[source,text/css] ----- -.actor { position: absolute; } -.coin { background: rgb(241, 229, 89); } -.player { background: rgb(64, 64, 64); } ----- - -(((graphics)))(((optimization)))(((efficiency)))When it updates the -display, the `drawFrame` method first removes the old actor graphics, -if any, and then redraws them in their new positions. It may be -tempting to try to reuse the ((DOM)) elements for actors, but to make -that work, we would need a lot of additional information flow between -the display code and the simulation code. We'd need to associate -actors with DOM elements, and the ((drawing)) code must remove -elements when their actors vanish. Since there will typically be only -a handful of actors in the game, redrawing all of them is not -expensive. - -// include_code - -[source,javascript] ----- -DOMDisplay.prototype.drawFrame = function() { - if (this.actorLayer) - this.wrap.removeChild(this.actorLayer); - this.actorLayer = this.wrap.appendChild(this.drawActors()); - this.wrap.className = "game " + (this.level.status || ""); - this.scrollPlayerIntoView(); -}; ----- - -(((level)))(((class attribute)))(((style sheet)))By adding the level's -current status as a class name to the wrapper, we can style the player -actor slightly differently when the game is won or lost by adding a -((CSS)) rule that takes effect only when the player has an ((ancestor -element)) with a given class. - -[source,text/css] ----- -.lost .player { - background: rgb(160, 64, 64); -} -.won .player { - box-shadow: -4px -7px 8px white, 4px -7px 8px white; -} ----- - -(((player)))(((box shadow (CSS))))After touching ((lava)), the -player's color turns dark red, suggesting scorching. When the last -coin has been collected, we use two blurred white box shadows, one to the top -left and one to the top right, to create a white halo effect. - -[[viewport]] - -(((position (CSS))))(((max-width (CSS))))(((overflow -(CSS))))(((max-height (CSS))))(((viewport)))We can't assume that -levels always fit in the viewport. That is why the -`scrollPlayerIntoView` call is needed—it ensures that if the level is -protruding outside the viewport, we scroll that viewport to make -sure the player is near its center. The following ((CSS)) gives the -game's wrapping ((DOM)) element a maximum size and ensures that -anything that sticks out of the element's box is not visible. We also give the outer element a relative -position so that the actors inside it are positioned relative to -the level's top-left corner. - -[source,text/css] ----- -.game { - overflow: hidden; - max-width: 600px; - max-height: 450px; - position: relative; -} ----- - -(((scrolling)))In the `scrollPlayerIntoView` method, we find the -player's position and update the wrapping element's scroll position. -We change the scroll position by manipulating that element's `scrollLeft` -and `scrollTop` properties when the player is too close to the edge. - -// include_code - -[source,javascript] ----- -DOMDisplay.prototype.scrollPlayerIntoView = function() { - var width = this.wrap.clientWidth; - var height = this.wrap.clientHeight; - var margin = width / 3; - - // The viewport - var left = this.wrap.scrollLeft, right = left + width; - var top = this.wrap.scrollTop, bottom = top + height; - - var player = this.level.player; - var center = player.pos.plus(player.size.times(0.5)) - .times(scale); - - if (center.x < left + margin) - this.wrap.scrollLeft = center.x - margin; - else if (center.x > right - margin) - this.wrap.scrollLeft = center.x + margin - width; - if (center.y < top + margin) - this.wrap.scrollTop = center.y - margin; - else if (center.y > bottom - margin) - this.wrap.scrollTop = center.y + margin - height; -}; ----- - -(((center)))(((coordinates)))(((readability)))The way the player's -center is found shows how the methods on our `Vector` type allow -computations with objects to be written in a readable way. To -find the actor's center, we add its position (its top-left corner) and -half its size. That is the center in level coordinates, but we need it -in pixel coordinates, so we then multiply the resulting vector by our -display scale. - -(((validation)))Next, a series of checks verify that the player -position isn't outside of the allowed range. Note that sometimes this -will set nonsense scroll coordinates, below zero or beyond the -element's scrollable area. This is okay—the DOM will constrain them to -sane values. Setting `scrollLeft` to -10 will cause it to become 0. - -It would have been slightly simpler to always try to scroll the player -to the center of the ((viewport)). But this creates a rather jarring -effect. As you are jumping, the view will constantly shift up and -down. It is more pleasant to have a “neutral” area in the middle of -the screen where you can move around without causing any scrolling. - -(((cleaning up)))Finally, we'll need a way to clear a displayed level, -to be used when the game moves to the next level or resets a level. - -// include_code - -[source,javascript] ----- -DOMDisplay.prototype.clear = function() { - this.wrap.parentNode.removeChild(this.wrap); -}; ----- - -(((game,screenshot)))We are now able to display our tiny level. - -[source,text/html] ----- - - - ----- - -ifdef::book_target[] - -image::img/game_simpleLevel.png[alt="Our level rendered",width="7cm"] - -endif::book_target[] - -(((link (HTML tag))))(((style sheet)))(((CSS)))The `` tag, when used -with `rel="stylesheet"`, is a way to load a CSS file into a page. The -file `game.css` contains the styles necessary for our game. - -== Motion and collision == - -(((physics)))(((animation)))Now we're at the point where we can start -adding motion—the most interesting aspect of the game. The basic -approach, taken by most games like this, is to split ((time)) into -small steps and, for each step, move the actors by a distance -corresponding to their speed (distance moved per second) multiplied by -the size of the time step (in seconds). - -(((obstacle)))(((collision detection)))That is easy. The difficult -part is dealing with the interactions between the elements. When the -player hits a wall or floor, they should not simply move through it. -The game must notice when a given motion causes an object to hit -another object and respond accordingly. For walls, the motion must be -stopped. For coins, the coin must be collected, and so on. - -Solving this for the general case is a big task. You can find -libraries, usually called _((physics engine))s_, that simulate -interaction between physical objects in two or three ((dimensions)). -We'll take a more modest approach in this chapter, handling only -collisions between rectangular objects and handling them in a rather simplistic -way. - -(((bouncing)))(((collision detection)))(((animation)))Before moving -the ((player)) or a block of ((lava)), we test whether the motion -would take it inside of a nonempty part of the ((background)). If it -does, we simply cancel the motion altogether. The response to such a -collision depends on the type of actor—the player will stop, whereas a -lava block will bounce back. - -(((discretization)))This approach requires our ((time)) steps to be -rather small since it will cause motion to stop before the objects -actually touch. If the time steps (and thus the motion steps) are too -big, the player would end up hovering a noticeable distance above the -ground. Another approach, arguably better but more complicated, would -be to find the exact collision spot and move there. We will take the -simple approach and hide its problems by ensuring the animation -proceeds in small steps. - -(((obstacle)))(((obstacleAt method)))(((collision detection)))This -method tells us whether a ((rectangle)) (specified by a position and a -size) overlaps with any nonempty space on the background grid: - -// include_code - -[source,javascript] ----- -Level.prototype.obstacleAt = function(pos, size) { - var xStart = Math.floor(pos.x); - var xEnd = Math.ceil(pos.x + size.x); - var yStart = Math.floor(pos.y); - var yEnd = Math.ceil(pos.y + size.y); - - if (xStart < 0 || xEnd > this.width || yStart < 0) - return "wall"; - if (yEnd > this.height) - return "lava"; - for (var y = yStart; y < yEnd; y++) { - for (var x = xStart; x < xEnd; x++) { - var fieldType = this.grid[y][x]; - if (fieldType) return fieldType; - } - } -}; ----- - -(((Math.floor function)))(((Math.ceil function)))This method computes the set -of grid squares that the body ((overlap))s with by using `Math.floor` -and `Math.ceil` on the body's ((coordinates)). Remember that ((grid)) squares -are 1×1 units in size. By ((rounding)) the sides of a box up and -down, we get the range of ((background)) squares that the box touches. - -image::img/game-grid.svg[alt="Finding collisions on a grid",width="3cm"] - -If the body sticks out of the level, we always return `"wall"` for the -sides and top and `"lava"` for the bottom. This ensures that the -player dies when falling out of the world. When the body is fully -inside the grid, we loop over the block of ((grid)) squares found by -((rounding)) the ((coordinates)) and return the content of the first -nonempty square we find. - -(((coin)))(((lava)))(((collision detection)))Collisions between the -((player)) and other dynamic ((actor))s (coins, moving lava) are -handled _after_ the player moved. When the motion has taken the player -into another actor, the appropriate effect—collecting a coin or -dying—is activated. - -(((actorAt method)))This method scans the array of actors, -looking for an actor that overlaps the one given as an argument: - -// include_code - -[source,javascript] ----- -Level.prototype.actorAt = function(actor) { - for (var i = 0; i < this.actors.length; i++) { - var other = this.actors[i]; - if (other != actor && - actor.pos.x + actor.size.x > other.pos.x && - actor.pos.x < other.pos.x + other.size.x && - actor.pos.y + actor.size.y > other.pos.y && - actor.pos.y < other.pos.y + other.size.y) - return other; - } -}; ----- - -[[actors]] -== Actors and actions == - -(((animate method)))(((animation)))(((keyboard)))The `animate` method -on the `Level` type gives all actors in the level a chance to move. -Its `step` argument is the ((time)) step in seconds. The `keys` object -contains information about the arrow keys the player has pressed. - -// include_code - -[source,javascript] ----- -var maxStep = 0.05; - -Level.prototype.animate = function(step, keys) { - if (this.status != null) - this.finishDelay -= step; - - while (step > 0) { - var thisStep = Math.min(step, maxStep); - this.actors.forEach(function(actor) { - actor.act(thisStep, this, keys); - }, this); - step -= thisStep; - } -}; ----- - -(((level)))(((animation)))When the level's `status` property has a -non-null value (which is the case when the player has won or lost), we -must count down the `finishDelay` property, which tracks the time -between the point where winning or losing happens and the point where -we want to stop showing the level. - -(((while loop)))(((discretization)))The `while` loop cuts the time -step we are animating into suitably small pieces. It ensures that no -step larger than `maxStep` is taken. For example, a `step` of 0.12 -second would be cut into two steps of 0.05 seconds and one step of 0.02. - -(((actor)))(((Lava type)))(((lava)))Actor objects have an `act` -method, which takes as arguments the time step, the level object, and -the `keys` object. Here is one, for the `Lava` actor type, -which ignores the `keys` object: - -// include_code - -[source,javascript] ----- -Lava.prototype.act = function(step, level) { - var newPos = this.pos.plus(this.speed.times(step)); - if (!level.obstacleAt(newPos, this.size)) - this.pos = newPos; - else if (this.repeatPos) - this.pos = this.repeatPos; - else - this.speed = this.speed.times(-1); -}; ----- - -(((bouncing)))(((multiplication)))(((Vector type)))(((collision -detection)))It computes a new position by adding the product of the -((time)) step and its current speed to its old position. If no -obstacle blocks that new position, it moves there. If there is an -obstacle, the behavior depends on the type of the ((lava)) -block—dripping lava has a `repeatPos` property, to which it jumps back -when it hits something. Bouncing lava simply inverts its speed -(multiplies it by -1) in order to start moving in the other direction. - -(((Coin type)))(((coin)))(((wave)))Coins use their `act` method to -wobble. They ignore collisions since they are simply wobbling around -inside of their own square, and collisions with the ((player)) will be -handled by the _player_'s `act` method. - -// include_code - -[source,javascript] ----- -var wobbleSpeed = 8, wobbleDist = 0.07; - -Coin.prototype.act = function(step) { - this.wobble += step * wobbleSpeed; - var wobblePos = Math.sin(this.wobble) * wobbleDist; - this.pos = this.basePos.plus(new Vector(0, wobblePos)); -}; ----- - -(((Math.sin function)))(((sine)))(((phase)))The `wobble` property is -updated to track time and then used as an argument to `Math.sin` to -create a ((wave)), which is used to compute a new position. - -(((collision detection)))(((Player type)))That leaves the ((player)) -itself. Player motion is handled separately per ((axis)) because -hitting the floor should not prevent horizontal motion, and hitting a -wall should not stop falling or jumping motion. This method implements -the horizontal part: - -// include_code - -[source,javascript] ----- -var playerXSpeed = 7; - -Player.prototype.moveX = function(step, level, keys) { - this.speed.x = 0; - if (keys.left) this.speed.x -= playerXSpeed; - if (keys.right) this.speed.x += playerXSpeed; - - var motion = new Vector(this.speed.x * step, 0); - var newPos = this.pos.plus(motion); - var obstacle = level.obstacleAt(newPos, this.size); - if (obstacle) - level.playerTouched(obstacle); - else - this.pos = newPos; -}; ----- - -(((animation)))(((keyboard)))The horizontal motion is computed based on the state -of the left and right arrow keys. When a motion causes the player to -hit something, the level's `playerTouched` method, which handles -things like dying in ((lava)) and collecting ((coin))s, is called. -Otherwise, the object updates its position. - -Vertical motion works in a similar way but has to simulate -((jumping)) and ((gravity)). - -// include_code - -[source,javascript] ----- -var gravity = 30; -var jumpSpeed = 17; - -Player.prototype.moveY = function(step, level, keys) { - this.speed.y += step * gravity; - var motion = new Vector(0, this.speed.y * step); - var newPos = this.pos.plus(motion); - var obstacle = level.obstacleAt(newPos, this.size); - if (obstacle) { - level.playerTouched(obstacle); - if (keys.up && this.speed.y > 0) - this.speed.y = -jumpSpeed; - else - this.speed.y = 0; - } else { - this.pos = newPos; - } -}; ----- - -(((acceleration)))(((physics)))At the start of the method, the player -is accelerated vertically to account for ((gravity)). The gravity, -((jumping)) speed, and pretty much all other ((constant))s in this -game have been set by ((trial and error)). I tested various values -until I found a combination I liked. - -(((collision detection)))(((keyboard)))(((jumping)))Next, we check for -obstacles again. If we hit an obstacle, there are two possible -outcomes. When the up arrow is pressed _and_ we are moving down -(meaning the thing we hit is below us), the speed is set to a -relatively large, negative value. This causes the player to jump. If -that is not the case, we simply bumped into something, and the speed -is reset to zero. - -The actual `act` method looks like this: - -// include_code - -[source,javascript] ----- -Player.prototype.act = function(step, level, keys) { - this.moveX(step, level, keys); - this.moveY(step, level, keys); - - var otherActor = level.actorAt(this); - if (otherActor) - level.playerTouched(otherActor.type, otherActor); - - // Losing animation - if (level.status == "lost") { - this.pos.y += step; - this.size.y -= step; - } -}; ----- - -(((player)))After moving, the method checks for other actors that the -player is colliding with and again calls `playerTouched` when it -finds one. This time, it passes the actor object as the second argument -because if the other actor is a ((coin)), `playerTouched` needs to -know _which_ coin is being collected. - -(((animation)))Finally, when the player dies (touches lava), we set up -a little animation that causes them to “shrink” or “sink” down by -reducing the height of the player object. - -(((collision detection)))And here is the method that handles -collisions between the player and other objects: - -// include_code - -[source,javascript] ----- -Level.prototype.playerTouched = function(type, actor) { - if (type == "lava" && this.status == null) { - this.status = "lost"; - this.finishDelay = 1; - } else if (type == "coin") { - this.actors = this.actors.filter(function(other) { - return other != actor; - }); - if (!this.actors.some(function(actor) { - return actor.type == "coin"; - })) { - this.status = "won"; - this.finishDelay = 1; - } - } -}; ----- - -When ((lava)) is touched, the game's status is set to `"lost"`. When a -coin is touched, that ((coin)) is removed from the array of actors, -and if it was the last one, the game's status is set to `"won"`. - -This gives us a level that can actually be animated. All that is -missing now is the code that _drives_ the animation. - -== Tracking keys == - -(((keyboard)))For a ((game)) like this, we do not want keys to take -effect once per keypress. Rather, we want their effect (moving the player -figure) to continue happening as long as they are pressed. - -(((preventDefault method)))We need to set up a key handler that stores -the current state of the left, right, and up arrow keys. We will also want -to call `preventDefault` for those keys so that they don't end up -((scrolling)) the page. - -(((trackKeys function)))(((key code)))(((event -handling)))(((addEventListener method)))The following function, when given -an object with key codes as property names and key names as values, -will return an object that tracks the current position of those keys. -It registers event handlers for `"keydown"` and `"keyup"` events and, -when the key code in the event is present in the set of codes that it -is tracking, updates the object. - -// include_code - -[source,javascript] ----- -var arrowCodes = {37: "left", 38: "up", 39: "right"}; - -function trackKeys(codes) { - var pressed = Object.create(null); - function handler(event) { - if (codes.hasOwnProperty(event.keyCode)) { - var down = event.type == "keydown"; - pressed[codes[event.keyCode]] = down; - event.preventDefault(); - } - } - addEventListener("keydown", handler); - addEventListener("keyup", handler); - return pressed; -} ----- - -(((keydown event)))(((keyup event)))Note how the same handler function -is used for both event types. It looks at the event object's `type` -property to determine whether the key state should be updated to true -(`"keydown"`) or false (`"keyup"`). - -[[runAnimation]] -== Running the game == - -(((requestAnimationFrame function)))(((animation)))The -`requestAnimationFrame` function, which we saw in -link:13_dom.html#animationFrame[Chapter 13], provides a good way to -animate a game. But its interface is quite primitive—using it requires -us to track the time at which our function was called the last time -around and call `requestAnimationFrame` again after every frame. - -(((runAnimation function)))(((callback function)))(((function,as -value)))(((function,higher-order)))Let's define a helper function that -wraps those boring parts in a convenient interface and allows us to -simply call `runAnimation`, giving it a function that expects a time -difference as an argument and draws a single frame. When the frame -function returns the value `false`, the animation stops. - -// include_code - -[source,javascript] ----- -function runAnimation(frameFunc) { - var lastTime = null; - function frame(time) { - var stop = false; - if (lastTime != null) { - var timeStep = Math.min(time - lastTime, 100) / 1000; - stop = frameFunc(timeStep) === false; - } - lastTime = time; - if (!stop) - requestAnimationFrame(frame); - } - requestAnimationFrame(frame); -} ----- - -(((time)))(((discretization)))I have set a maximum frame step of 100 -milliseconds (one-tenth of a second). When the browser tab or window -with our page is hidden, `requestAnimationFrame` calls will be -suspended until the tab or window is shown again. In this case, the difference -between `lastTime` and `time` will be the entire time in which the -page was hidden. Advancing the game by that much in a single step will -look silly and might be a lot of work (remember the time-splitting in -the link:15_game.html#actors[`animate` method]). - -The function also converts the time steps to seconds, which are an -easier quantity to think about than milliseconds. - -(((callback function)))(((runLevel function)))The `runLevel` function -takes a `Level` object, a constructor for a ((display)), and, -optionally, a function. It displays the level (in `document.body`) and -lets the user play through it. When the level is finished (lost or -won), `runLevel` clears the display, stops the ((animation)), and, if an -`andThen` function was given, calls that function with the level's status. - -// include_code - -[source,javascript] ----- -var arrows = trackKeys(arrowCodes); - -function runLevel(level, Display, andThen) { - var display = new Display(document.body, level); - runAnimation(function(step) { - level.animate(step, arrows); - display.drawFrame(step); - if (level.isFinished()) { - display.clear(); - if (andThen) - andThen(level.status); - return false; - } - }); -} ----- - -(((runGame function)))A game is a sequence of ((level))s. Whenever the -((player)) dies, the current level is restarted. When a level is -completed, we move on to the next level. This can be expressed by the -following function, which takes an array of level plans (arrays of -strings) and a ((display)) constructor: - -// include_code - -[source,javascript] ----- -function runGame(plans, Display) { - function startLevel(n) { - runLevel(new Level(plans[n]), Display, function(status) { - if (status == "lost") - startLevel(n); - else if (n < plans.length - 1) - startLevel(n + 1); - else - console.log("You win!"); - }); - } - startLevel(0); -} ----- - -(((function,higher-order)))(((function,as value)))These functions show -a peculiar style of programming. Both `runAnimation` and `runLevel` -are higher-order functions but are not in the style we saw in -link:05_higher_order.html#higher_order[Chapter 5]. The function -argument is used to arrange things to happen at some time in the -future, and neither of the functions returns anything useful. Their -task is, in a way, to schedule actions. Wrapping these actions in -functions gives us a way to store them as a value so that they can be -called at the right moment. - -(((asynchronous programming)))(((event handling)))This programming -style is usually called _asynchronous_ programming. Event handling is -also an instance of this style, and we will see much more of it when working -with tasks that can take an arbitrary amount of ((time)), such as -((network)) requests in link:17_http.html#http[Chapter 17] and input -and output in general in link:20_node.html#node[Chapter 20]. - -(((game)))(((GAME_LEVELS data set)))There is a set of -((level)) plans available in the `GAME_LEVELS` variable (!book (downloadable from -http://eloquentjavascript.net/code#15[_eloquentjavascript.net/code#15_])!). -This page feeds them to `runGame`, starting an actual game: - -// start_code - -[sandbox="null"] -[focus="yes"] -[source,text/html] ----- - - - - - ----- - -ifdef::interactive_target[] - -See if you can beat those. I had quite a lot of fun building them. - -endif::interactive_target[] - -== Exercises == - -=== Game over === - -(((lives (exercise))))(((game)))It's traditional for ((platform game))s -to have the player start with a limited number of _lives_ and -subtract one life each time they die. When the player is out of lives, the game -restarts from the beginning. - -(((runGame function)))Adjust `runGame` to implement lives. Have the -player start with three. - -ifdef::interactive_target[] - -// test: no - -[focus="yes"] -[source,text/html] ----- - - - - - ----- - -endif::interactive_target[] - -!!hint!! - -(((lives (exercise))))(((runGame function)))The most obvious solution -would be to make `lives` a variable that lives in `runGame` and is -thus visible to the `startLevel` ((closure)). - -Another approach, which fits nicely with the spirit of the rest of the -function, would be to add a second ((parameter)) to `startLevel` that -gives the number of lives. When the whole ((state)) of a system is stored -in the arguments to a ((function)), calling that function provides an -elegant way to transition to a new state. - -In any case, when a ((level)) is lost, there should now be two -possible state transitions. If that was the last life, we go back to -level zero with the starting amount of lives. If not, we repeat the -current level with one less life remaining. - -!!hint!! - -=== Pausing the game === - -(((pausing (exercise))))(((escape key)))(((keyboard)))Make it possible -to pause (suspend) and unpause the game by pressing the Esc key. - -(((runLevel function)))(((event handling)))This can be done by -changing the `runLevel` function to use another keyboard event -handler and interrupting or resuming the animation whenever the -Esc key is hit. - -(((runAnimation function)))The `runAnimation` interface may not look -like it is suitable for this at first glance, but it is, if you -rearrange the way `runLevel` calls it. - -(((variable,global)))(((trackKeys function)))When you have that -working, there is something else you could try. The way we have been -registering keyboard event handlers is somewhat problematic. The -`arrows` object is currently a global variable, and its event handlers -are kept around even when no game is running. You could say they _((leak))_ out of -our system. Extend `trackKeys` to provide a way to -unregister its handlers, and then change `runLevel` to register its -handlers when it starts and unregister them again when it is -finished. - -ifdef::interactive_target[] - -// test: no - -[focus="yes"] -[source,text/html] ----- - - - - - ----- - -endif::interactive_target[] - -!!hint!! - -(((pausing (exercise))))An ((animation)) can be interrupted by -returning `false` from the function given to `runAnimation`. It can be -continued by calling `runAnimation` again. - -(((closure)))To communicate that the animation should be -interrupted to the function passed to `runAnimation` so that it can -return `false`, you can use a variable that both the event handler and -that function have access to. - -(((event handling)))(((removeEventListener method)))(((function,as -value)))When finding a way to unregister the handlers registered by -`trackKeys`, remember that the _exact_ same function value that was -passed to `addEventListener` must be passed to `removeEventListener` -to successfully remove a handler. Thus, the `handler` function value -created in `trackKeys` must be available to the code that unregisters -the handlers. - -You can add a property to the object returned by `trackKeys`, -containing either that function value or a method that handles the -unregistering directly. - -!!hint!! diff --git a/16_canvas.txt b/16_canvas.txt deleted file mode 100644 index 40cc760be..000000000 --- a/16_canvas.txt +++ /dev/null @@ -1,1499 +0,0 @@ -:chap_num: 16 -:prev_link: 15_game -:next_link: 17_http -:load_files: ["code/chapter/15_game.js", "code/game_levels.js", "code/chapter/16_canvas.js"] -:zip: html include=["img/player.png", "img/sprites.png"] - -= Drawing on Canvas = - -[chapterquote="true"] -[quote,M.C. Escher,cited by Bruno Ernst in The Magic Mirror of M.C. Escher] -____ -Drawing is deception. -____ - -(((Escher+++,+++ M.C.)))(((CSS)))(((transform (CSS))))Browsers give us -several ways to display ((graphics)). The simplest way is to use styles to -position and color regular ((DOM)) elements. This can -get you quite far, as the game in the link:15_game.html#game[previous chapter] -showed. By adding partially transparent background ((image))s to the -nodes, we can make them look exactly the way we want. It is even -possible to rotate or skew nodes by using the `transform` style. - -But we'd be using the DOM for something that it wasn't originally -designed for. Some tasks, such as drawing a ((line)) between -arbitrary points, are extremely awkward to do with regular -((HTML)) elements. - -(((SVG)))(((img (HTML tag))))There are two alternatives. The first is DOM-based -but utilizes _Scalable Vector Graphics (SVG)_, rather than HTML -elements. Think of SVG as a dialect for describing -((document))s that focuses on ((shape))s rather than text. You can embed an SVG -document in an HTML document, or you can include it -through an `` tag. - -(((clearing)))The second alternative is called a _((canvas))_. A -canvas is a single ((DOM)) element that encapsulates a ((picture)). It -provides a programming ((interface)) for drawing ((shape))s onto the -space taken up by the node. The main difference between a canvas and -an SVG picture is that in SVG the original description of the shapes -is preserved so that they can be moved or resized at any time. -A canvas, on the other hand, converts the shapes to ((pixel))s (colored -dots on a raster) as soon as they are drawn and does not remember -what these pixels represent. The only way to move a shape on a canvas -is to clear the canvas (or the part of the canvas around the shape) and redraw it -with the shape in a new position. - -== SVG == - -This book will not go into ((SVG)) in detail, but I will briefly - explain how it works. At the -link:16_canvas.html#graphics_tradeoffs[end of the chapter], I'll come -back to the trade-offs that you must consider when deciding which -((drawing)) mechanism is appropriate for a given application. - -This is an HTML document with a simple SVG ((picture)) in it: - -[sandbox="svg"] -[source,text/html] ----- -

    Normal HTML here.

    - - - - ----- - -(((circle (SVG tag))))(((rect (SVG tag))))(((XML namespace)))(((XML)))(((xmlns -attribute)))The `xmlns` attribute changes an element (and its -children) to a different _XML namespace_. This namespace, identified -by a ((URL)), specifies the dialect that we are currently speaking. -The `` and `` tags, which do not exist in HTML, do have -a meaning in SVG—they draw shapes using the style and position -specified by their attributes. - -ifdef::book_target[] - -The document is displayed like this: - -image::img/svg-demo.png[alt="An embedded SVG image",width="4.5cm"] - -endif::book_target[] - -These tags create ((DOM)) elements, just like ((HTML)) tags. For -example, this changes the `` element to be ((color))ed cyan -instead: - -[sandbox="svg"] -[source,javascript] ----- -var circle = document.querySelector("circle"); -circle.setAttribute("fill", "cyan"); ----- - -== The canvas element == - -(((canvas,size)))(((canvas (HTML tag))))Canvas ((graphics)) can be drawn -onto a `` element. You can give such an element `width` and -`height` attributes to determine its size in ((pixel))s. - -A new canvas is empty, meaning it is entirely ((transparent)) and -thus shows up simply as empty space in the document. - -(((2d (canvas context))))(((webgl (canvas -context))))(((OpenGL)))(((canvas,context)))(((dimensions)))The `` -tag is intended to support different styles of ((drawing)). To get -access to an actual drawing ((interface)), we first need to create a -_((context))_, which is an object whose methods provide the drawing -interface. There are currently two widely supported drawing styles: -`"2d"` for two-dimensional graphics and `"webgl"` for -three-dimensional graphics through the OpenGL interface. - -(((rendering)))(((graphics)))(((efficiency)))This book won't discuss -WebGL. We stick to two dimensions. But if you are interested in -three-dimensional graphics, I do encourage you to look into WebGL. It -provides a very direct interface to modern graphics hardware and thus -allows you to render even complicated scenes efficiently, using -JavaScript. - -(((getContext method)))(((canvas,context)))A ((context)) is created -through the `getContext` method on the `` element. - -[source,text/html] ----- -

    Before canvas.

    - -

    After canvas.

    - ----- - -After creating the context object, the example draws a red -((rectangle)) 100 ((pixel))s wide and 50 pixels high, with its top-left -corner at coordinates (10,10). - -ifdef::book_target[] - -image::img/canvas_fill.png[alt="A canvas with a rectangle",width="2.5cm"] - -endif::book_target[] - -(((SVG)))(((coordinates)))Just like in ((HTML)) (and SVG), the -coordinate system that the canvas uses puts (0,0) at the top-left -corner, and the positive y-((axis)) goes down from there. So (10,10) -is 10 pixels below and to the right of the top-left corner. - -[[fill_stroke]] -== Filling and stroking == - -(((filling)))(((stroking)))(((drawing)))(((SVG)))In the ((canvas)) interface, -a shape can be _filled_, meaning its area is given a certain color or pattern, -or it can be _stroked_, which means a ((line)) is drawn along its edge. The -same terminology is used by SVG. - -(((fillRect method)))(((strokeRect method)))The `fillRect` method fills -a ((rectangle)). It takes first the x- and y-((coordinates)) of the -rectangle's top-left corner, then its width, and then its height. A -similar method, `strokeRect`, draws the ((outline)) of a rectangle. - -(((property)))(((state)))Neither method takes any further parameters. -The color of the fill, thickness of the stroke, and so on are not -determined by an argument to the method (as you might justly expect) -but rather by properties of the context object. - -(((filling)))(((fillStyle property)))Setting `fillStyle` changes the way shapes are -filled. It can be set to a string that specifies a ((color)), and any -color understood by ((CSS)) can also be used here. - -(((stroking)))(((line width)))(((strokeStyle property)))(((lineWidth -property)))(((canvas)))The `strokeStyle` property works similarly but -determines the color used for a stroked line. The width of that line -is determined by the `lineWidth` property, which may contain any -positive number. - -[source,text/html] ----- - - ----- - -ifdef::book_target[] - -This code draws two blue squares, using a thicker line for the second -one. - -image::img/canvas_stroke.png[alt="Two stroked squares",width="5cm"] - -endif::book_target[] - -(((default value)))(((canvas,size)))When no `width` or `height` -attribute is specified, as in the previous example, a canvas element -gets a default width of 300 pixels and height of 150 pixels. - -== Paths == - -(((path,canvas)))(((interface,design)))(((canvas,path)))A path is a -sequence of ((line))s. The 2D canvas interface takes a peculiar -approach to describing such a path. It is done entirely through -((side effect))s. Paths are not values that can be stored and -passed around. Instead, if you want to do something with a path, you -make a sequence of method calls to describe its shape. - -[source,text/html] ----- - - ----- - -(((canvas)))(((stroke method)))(((lineTo method)))(((moveTo -method)))(((shape)))This example creates a path with a number of -horizontal ((line)) segments and then strokes it using the `stroke` -method. Each segment created with `lineTo` starts at the path's -_current_ position. That position is usually the end of the last segment, -unless `moveTo` was called. In that case, the next segment would start -at the position passed to `moveTo`. - -ifdef::book_target[] - -The path described by the previous program looks like this: - -image::img/canvas_path.png[alt="Stroking a number of lines",width="2.1cm"] - -endif::book_target[] - -(((path,canvas)))(((filling)))(((path,closing)))(((fill method)))When -filling a path (using the `fill` method), each ((shape)) is filled -separately. A path can contain multiple shapes—each `moveTo` motion -starts a new one. But the path needs to be _closed_ (meaning its start and -end are in the same position) before it can be filled. If the path is not -already closed, a line is added from its end to its -start, and the shape enclosed by the completed path is filled. - -[source,text/html] ----- - - ----- - -This example draws a filled triangle. Note that only two of the triangle's -sides are explicitly drawn. The third, from the bottom-right corner -back to the top, is implied and won't be there when you stroke the -path. - -ifdef::book_target[] - -image::img/canvas_triangle.png[alt="Filling a path",width="2.2cm"] - -endif::book_target[] - -(((stroke method)))(((closePath -method)))(((path,closing)))(((canvas)))You could also use the `closePath` method -to explicitly close a path by adding an actual ((line)) segment back to -the path's start. This segment _is_ drawn when stroking the path. - -== Curves == - -(((path,canvas)))(((canvas)))(((drawing)))A path may also contain ((curve))d -((line))s. These are, unfortunately, a bit more involved to draw than -straight lines. - -(((quadraticCurveTo method)))The `quadraticCurveTo` method draws a -curve to a given point. To determine the curvature of the line, the method is -given a ((control point)) as well as a destination point. -Imagine this control point as _attracting_ the line, giving the line its -curve. The line won't go through the control point. Rather, the -direction of the line at its start and end points will be such that it -aligns with the line from there to the control point. The following -example illustrates this: - -[source,text/html] ----- - - ----- - -ifdef::book_target[] - -It produces a path that looks like this: - -image::img/canvas_quadraticcurve.png[alt="A quadratic curve",width="2.3cm"] - -endif::book_target[] - -(((stroke method)))We draw a ((quadratic curve)) from the left to the -right, with (60,10) as control point, and then draw two ((line)) -segments going through that control point and back to the start of -the line. The result somewhat resembles a _((Star Trek))_ insignia. You -can see the effect of the control point: the lines leaving the lower -corners start off in the direction of the control point and then -((curve)) toward their target. - -(((canvas)))(((bezierCurveTo method))) The `bezierCurveTo` method draws a -similar kind of curve. Instead of a single ((control point)), this one -has two—one for each of the ((line))'s endpoints. Here is a similar sketch to -illustrate the behavior of such a curve: - -[source,text/html] ----- - - ----- - -The two control points specify the direction at both ends of the -curve. The further they are away from their corresponding point, the -more the curve will “bulge” in that direction. - -ifdef::book_target[] - -image::img/canvas_beziercurve.png[alt="A bezier curve",width="2.2cm"] - -endif::book_target[] - -(((trial and error)))Such ((curve))s can be hard to work with—it's -not always clear how to find the ((control point))s that provide the -((shape)) you are looking for. Sometimes you can compute -them, and sometimes you'll just have to find a suitable value by trial -and error. - -(((rounding)))(((canvas)))(((arcTo method)))(((arc)))__Arcs__—fragments of a -((circle))—are easier to reason about. The `arcTo` method -takes no less than five arguments. The first four arguments act -somewhat like the arguments to ++quadraticCurveTo++. The first pair -provides a sort of ((control point)), and the second pair gives the -line's destination. The fifth argument provides the ((radius)) of the -arc. The method will conceptually project a corner—a line going to the -control point and then to the destination point—and round the corner's point so -that it forms part of a circle with the given radius. The `arcTo` method then draws -the rounded part, as well as a line from the starting position to the -start of the rounded part. - -[source,text/html] ----- - - ----- - -ifdef::book_target[] - -This produces two rounded corners with different radii. - -image::img/canvas_arc.png[alt="Two arcs with different radii",width="2.3cm"] - -endif::book_target[] - -(((canvas)))(((arcTo method)))(((lineTo method)))The `arcTo` method -won't draw the line from the end of the rounded part to the goal -position, though the word _to_ in its name would suggest it does. You -can follow up with a call to `lineTo` with the same goal coordinates -to add that part of the line. - -(((arc method)))(((arc)))To draw a ((circle)), you could use four -calls to `arcTo` (each turning 90 degrees). But the `arc` method -provides a simpler way. It takes a pair of ((coordinates)) for the -arc's center, a radius, and then a start and end angle. - -(((pi)))(((Math.PI constant)))Those last two parameters make it -possible to draw only part of circle. The ((angle))s are measured in -((radian))s, not ((degree))s. This means a full ((circle)) has an -angle of 2π, or `2 * Math.PI`, which is about 6.28. The angle starts counting at -the point to the right of the circle's center and goes clockwise from -there. You can use a start of 0 and an end bigger than 2π (say, 7) -to draw a full circle. - -[source,text/html] ----- - - ----- - -(((moveTo method)))(((arc method)))(((path, canvas)))The resulting picture -contains a ((line)) from the right of the full circle (first call to -`arc`) to the right of the quarter-((circle)) (second call). Like other -path-drawing methods, a line drawn with `arc` is connected to the -previous path segment by default. You'd have to call `moveTo` or -start a new path if you want to avoid this. - -ifdef::book_target[] - -image::img/canvas_circle.png[alt="Drawing a circle",width="4.9cm"] - -endif::book_target[] - -[[pie_chart]] -== Drawing a pie chart == - -(((pie chart example)))Imagine you've just taken a ((job)) at -EconomiCorp, Inc., and your first assignment is to draw a pie chart of -their customer satisfaction ((survey)) results. - -The `results` variable contains an array of objects that represent the -survey responses. - -// include_code - -[sandbox="pie"] -[source,javascript] ----- -var results = [ - {name: "Satisfied", count: 1043, color: "lightblue"}, - {name: "Neutral", count: 563, color: "lightgreen"}, - {name: "Unsatisfied", count: 510, color: "pink"}, - {name: "No comment", count: 175, color: "silver"} -]; ----- - -(((pie chart example)))To draw a pie chart, we draw a number of pie -slices, each made up of an ((arc)) and a pair of ((line))s to the center -of that arc. We can compute the ((angle)) taken up by each arc by dividing -a full circle (2π) by the total number of responses and then -multiplying that number (the angle per response) by the number of -people who picked a given choice. - -[sandbox="pie"] -[source,text/html] ----- - - ----- - -ifdef::book_target[] - -This draws the following chart: - -image::img/canvas_pie_chart.png[alt="A pie chart",width="5cm"] - -endif::book_target[] - -But a chart that doesn't tell us what it means isn't very helpful. We -need a way to draw text to the ((canvas)). - -== Text == - -(((stroking)))(((filling)))(((fillColor property)))(((fillText -method)))(((strokeText method)))A 2D canvas drawing context provides -the methods `fillText` and `strokeText`. The latter can be useful for -outlining letters, but usually `fillText` is what you need. It will -fill the given ((text)) with the current `fillColor`. - -[source,text/html] ----- - - ----- - -You can specify the size, style, and ((font)) of the text with the -`font` property. This example just gives a font size and family name. -You can add `italic` or `bold` to the start of the string to select a -style. - -(((fillText method)))(((strokeText method)))(((textAlign -property)))(((textBaseline property)))The last two arguments to -`fillText` (and `strokeText`) provide the position at which the font -is drawn. By default, they indicate the position of the start of the -text's alphabetic baseline, which is the line that letters “stand” on, not -counting hanging parts in letters like _j_ or _p_. You can change the horizontal -position by setting the `textAlign` property to `"end"` -or `"center"` and the vertical position by setting `textBaseline` to -`"top"`, `"middle"`, or `"bottom"`. - -(((pie chart example)))We will come back to our pie chart, and the -problem of ((label))ing the slices, in the -link:16_canvas.html#exercise_pie_chart[exercises] at the end of the -chapter. - -== Images == - -(((vector graphics)))(((bitmap graphics)))In computer ((graphics)), a -distinction is often made between _vector_ graphics and _bitmap_ -graphics. The first is what we have been doing so far in this -chapter—specifying a picture by giving a logical description of -((shape))s. Bitmap graphics, on the other hand, don't specify actual -shapes but rather work with ((pixel)) data (rasters of colored dots). - -(((load event)))(((event handling)))(((img (HTML tag))))(((drawImage -method)))The `drawImage` method allows us to draw ((pixel)) data onto -a ((canvas)). This pixel data can originate from an `` element or -from another canvas, and neither has to be visible in the actual -document. The following example creates a detached `` element and -loads an image file into it. But it cannot immediately start drawing -from this picture because the browser may not have fetched it yet. To -deal with this, we register a `"load"` event handler and do the -drawing after the image has loaded. - -[source,text/html] ----- - - ----- - -(((drawImage method)))(((scaling)))By default, `drawImage` will draw -the image at its original size. You can also give it two additional -arguments to dictate a different width and height. - -When `drawImage` is given _nine_ arguments, it can be used to draw -only a fragment of an image. The second through fifth arguments indicate the -rectangle (x, y, width, and height) in the source image that should be -copied, and the sixth to ninth arguments give the rectangle (on the -canvas) into which it should be copied. - -(((player character)))(((pixel art)))This can be used to pack multiple -_((sprite))s_ (image elements) into a single image file and then -draw only the part you need. For example, we have this picture containing a -game character in multiple ((pose))s: - -image::img/player_big.png[alt="Various poses of a game character",width="6cm"] - -By alternating which pose we draw, we can show an ((animation)) that -looks like a walking character. - -(((fillRect method)))(((clearRect method)))(((clearing)))To animate -the ((picture)) on a ((canvas)), the `clearRect` method is useful. It -resembles `fillRect`, but instead of coloring the rectangle, it makes -it ((transparent)), removing the previously drawn pixels. - -(((setInterval function)))(((img (HTML tag))))We know that each -_((sprite))_, each subpicture, is 24 ((pixel))s wide and 30 pixels -high. The following code loads the image and then sets up an interval -(repeated timer) to draw the next _((frame))_: - -[source,text/html] ----- - - ----- - -(((remainder operator)))(((% operator)))The `cycle` variable tracks -our position in the ((animation)). Each ((frame)), it is incremented -and then clipped back to the 0 to 7 range by using the remainder -operator. This variable is then used to compute the x-coordinate that -the sprite for the current pose has in the picture. - -== Transformation == - -indexsee:[flipping,mirroring] -(((transformation)))(((mirroring)))But what if we want our character to -walk to the left instead of to the right? We could add another set of -sprites, of course. But we can also instruct the ((canvas)) to draw -the picture the other way round. - -(((scale method)))(((scaling)))Calling the `scale` method will cause -anything drawn after it to be scaled. This method takes two parameters, one to -set a horizontal scale and one to set a vertical scale. - -[source,text/html] ----- - - ----- - -ifdef::book_target[] - -Due to the call to `scale`, the circle is drawn three times as wide -and half as high. - -image::img/canvas_scale.png[alt="A scaled circle",width="6.6cm"] - -endif::book_target[] - -(((mirroring)))Scaling will cause everything about the drawn image, including the -((line width)), to be stretched out or squeezed together as specified. -Scaling by a negative amount will flip the picture around. The -flipping happens around point (0,0), which means it will also -flip the direction of the coordinate system. When a horizontal scaling -of -1 is applied, a shape drawn at x position 100 will end up at what -used to be position -100. - -(((drawImage method)))So to turn a picture around, we can't simply -add `cx.scale(-1, 1)` before the call to `drawImage` since that would -move our picture outside of the ((canvas)), where it won't be visible. -You could adjust the ((coordinates)) given to -`drawImage` to compensate for this by drawing the image at x position -50 -instead of 0. Another solution, which doesn't require the code that does -the drawing to know about the scale change, is to adjust the ((axis)) -around which the scaling happens. - -(((rotate method)))(((translate method)))(((transformation)))There are several -other methods besides `scale` that influence the coordinate system for a ((canvas)). -You can rotate subsequently drawn shapes with the `rotate` method and move them with the -`translate` method. The interesting—and confusing—thing is that these -transformations _stack_, meaning that each one happens relative to the -previous transformations. - -(((rotate method)))(((translate method)))So if we translate by -10 horizontal pixels twice, everything will be drawn 20 pixels to the -right. If we first move the center of the coordinate system to (50,50) -and then rotate by 20 ((degree))s (0.1π in ((radian))s), that rotation -will happen _around_ point (50,50). - -image::img/transform.svg[alt="Stacking transformations",width="9cm"] - -(((coordinates)))But if we _first_ rotate by 20 degrees and _then_ -translate by (50,50), the translation will happen in the rotated -coordinate system and thus produce a different orientation. The order -in which transformations are applied matters. - -(((axis)))(((mirroring)))To flip a picture around the vertical line at a given x -position, we can do the following: - -// include_code - -[source,javascript] ----- -function flipHorizontally(context, around) { - context.translate(around, 0); - context.scale(-1, 1); - context.translate(-around, 0); -} ----- - -(((flipHorizontally method)))We move the y-((axis)) to where we -want our ((mirror)) to be, apply the mirroring, and finally move -the y-axis back to its proper place in the mirrored universe. The -following picture explains why this works: - -image::img/mirror.svg[alt="Mirroring around a vertical line",width="8cm"] - -(((translate method)))(((scale -method)))(((transformation)))(((canvas)))This shows the coordinate -systems before and after mirroring across the central line. If we draw a -triangle at a positive x position, it would, by default, be in the -place where triangle 1 is. A call to `flipHorizontally` first does a -translation to the right, which gets us to triangle 2. It then scales, -flipping the triangle back to position 3. This is not where it should -be, if it were mirrored in the given line. The second `translate` call -fixes this—it “cancels” the initial translation and makes triangle 4 -appear exactly where it should. - -We can now draw a mirrored character at position (100,0) by flipping -the world around the character's vertical center. - -[source,text/html] ----- - - ----- - -== Storing and clearing transformations == - -(((side effect)))(((canvas)))(((transformation)))Transformations stick -around. Everything else we draw after ((drawing)) that mirrored -character would also be mirrored. That might be a problem. - -It is possible to save the current transformation, do some drawing and -transforming, and then restore the old transformation. This is usually -the proper thing to do for a function that needs to temporarily -transform the coordinate system. First, we save whatever transformation the code that -called the function was using. Then, the function does its thing (on top of the -existing transformation), possibly adding more transformations. And finally, we -revert to the transformation that we started with. - -(((save method)))(((restore method)))The `save` and `restore` methods -on the 2D ((canvas)) context perform this kind of ((transformation)) -management. They conceptually keep a stack of transformation -((state))s. When you call `save`, the current state is pushed onto the -stack, and when you call `restore`, the state on top of the stack is -taken off and used as the context's current transformation. - -(((branching recursion)))(((fractal -example)))(((recursion)))The `branch` function in the following example -illustrates what you can do with a function that changes the -transformation and then calls another function (in this case itself), -which continues drawing with the given transformation. - -This function draws a treelike shape by drawing a line, -moving the center of the coordinate system to the end of the line, and calling -itself twice—first rotated to the left and then rotated to the -right. Every call reduces the length of the branch drawn, and the -recursion stops when the length drops below 8. - -[source,text/html] ----- - - ----- - -ifdef::book_target[] - -The result is a simple fractal. - -image::img/canvas_tree.png[alt="A recursive picture",width="5cm"] - -endif::book_target[] - -(((save method)))(((restore method)))(((canvas)))(((rotate method)))If -the calls to `save` and `restore` were not there, the second recursive -call to `branch` would end up with the position and rotation created -by the first call. It wouldn't be connected to the current branch but -rather to the innermost, rightmost branch drawn by the first call. The -resulting shape might also be interesting, but it is definitely not a -tree. - -[[canvasdisplay]] -== Back to the game == - -(((drawImage method)))We now know enough about ((canvas)) drawing to -start working on a ((canvas))-based ((display)) system for the -((game)) from the link:15_game.html#game[previous chapter]. The new -display will no longer be showing just colored boxes. Instead, we'll -use `drawImage` to draw pictures that represent the game's elements. - -(((CanvasDisplay type)))(((DOMDisplay type)))We will define an object -type `CanvasDisplay`, supporting the same ((interface)) as -`DOMDisplay` from link:15_game.html#domdisplay[Chapter 15], namely, the -methods `drawFrame` and `clear`. - -(((state)))This object keeps a little more information than -`DOMDisplay`. Rather than using the scroll position of its DOM -element, it tracks its own ((viewport)), which tells us what part of -the level we are currently looking at. It also tracks ((time)) and -uses that to decide which ((animation)) ((frame)) to use. And finally, -it keeps a `flipPlayer` property so that even when the player is -standing still, it keeps facing the direction it last moved in. - -// include_code - -[sandbox="game"] -[source,javascript] ----- -function CanvasDisplay(parent, level) { - this.canvas = document.createElement("canvas"); - this.canvas.width = Math.min(600, level.width * scale); - this.canvas.height = Math.min(450, level.height * scale); - parent.appendChild(this.canvas); - this.cx = this.canvas.getContext("2d"); - - this.level = level; - this.animationTime = 0; - this.flipPlayer = false; - - this.viewport = { - left: 0, - top: 0, - width: this.canvas.width / scale, - height: this.canvas.height / scale - }; - - this.drawFrame(0); -} - -CanvasDisplay.prototype.clear = function() { - this.canvas.parentNode.removeChild(this.canvas); -}; ----- - -(((CanvasDisplay type)))The `animationTime` counter is the reason we -passed the step size to `drawFrame` in -link:15_game.html#domdisplay[Chapter 15], even though `DOMDisplay` -does not use it. Our new `drawFrame` function uses the counter to track time -so that it can switch between ((animation)) ((frame))s based on the -current time. - -// include_code - -[sandbox="game"] -[source,javascript] ----- -CanvasDisplay.prototype.drawFrame = function(step) { - this.animationTime += step; - - this.updateViewport(); - this.clearDisplay(); - this.drawBackground(); - this.drawActors(); -}; ----- - -(((scrolling)))Other than tracking time, the method updates the -((viewport)) for the current player position, fills the whole canvas -with a background color, and draws the ((background)) and ((actor))s -onto that. Note that this is different from the approach in -link:15_game.html#domdisplay[Chapter 15], where we drew the background -once and scrolled the wrapping DOM element to move it. - -(((clearing)))Because shapes on a canvas are just ((pixel))s, after we -draw them, there is no way to move them (or remove them). The only way -to update the canvas display is to clear it and redraw the scene. - -(((CanvasDisplay type)))The `updateViewport` method is similar to -`DOMDisplay`'s `scrollPlayerIntoView` method. It checks whether the -player is too close to the edge of the screen and moves the -((viewport)) when this is the case. - -// include_code - -[sandbox="game"] -[source,javascript] ----- -CanvasDisplay.prototype.updateViewport = function() { - var view = this.viewport, margin = view.width / 3; - var player = this.level.player; - var center = player.pos.plus(player.size.times(0.5)); - - if (center.x < view.left + margin) - view.left = Math.max(center.x - margin, 0); - else if (center.x > view.left + view.width - margin) - view.left = Math.min(center.x + margin - view.width, - this.level.width - view.width); - if (center.y < view.top + margin) - view.top = Math.max(center.y - margin, 0); - else if (center.y > view.top + view.height - margin) - view.top = Math.min(center.y + margin - view.height, - this.level.height - view.height); -}; ----- - -(((boundary)))(((Math.max function)))(((Math.min function)))(((clipping)))The calls -to `Math.max` and `Math.min` ensure that the viewport does -not end up showing space outside of the level. `Math.max(x, 0)` -ensures that the resulting number is not less than zero. -`Math.min`, similarly, ensures a value stays below a given bound. - -When ((clearing)) the display, we'll use a slightly different -((color)) depending on whether the game is won (brighter) or lost -(darker). - -// include_code - -[sandbox="game"] -[source,javascript] ----- -CanvasDisplay.prototype.clearDisplay = function() { - if (this.level.status == "won") - this.cx.fillStyle = "rgb(68, 191, 255)"; - else if (this.level.status == "lost") - this.cx.fillStyle = "rgb(44, 136, 214)"; - else - this.cx.fillStyle = "rgb(52, 166, 251)"; - this.cx.fillRect(0, 0, - this.canvas.width, this.canvas.height); -}; ----- - -(((Math.floor function)))(((Math.ceil function)))(((rounding)))To draw the -background, we run through the tiles that are visible in the current -viewport, using the same trick used in `obstacleAt` in the -link:15_game.html#viewport[previous chapter]. - -// include_code - -[sandbox="game"] -[source,javascript] ----- -var otherSprites = document.createElement("img"); -otherSprites.src = "img/sprites.png"; - -CanvasDisplay.prototype.drawBackground = function() { - var view = this.viewport; - var xStart = Math.floor(view.left); - var xEnd = Math.ceil(view.left + view.width); - var yStart = Math.floor(view.top); - var yEnd = Math.ceil(view.top + view.height); - - for (var y = yStart; y < yEnd; y++) { - for (var x = xStart; x < xEnd; x++) { - var tile = this.level.grid[y][x]; - if (tile == null) continue; - var screenX = (x - view.left) * scale; - var screenY = (y - view.top) * scale; - var tileX = tile == "lava" ? scale : 0; - this.cx.drawImage(otherSprites, - tileX, 0, scale, scale, - screenX, screenY, scale, scale); - } - } -}; ----- - -(((drawImage method)))(((sprite)))(((tile)))Tiles that are not empty (null) -are drawn with `drawImage`. The `otherSprites` image contains the -pictures used for elements other than the player. It contains, from -left to right, the wall tile, the lava tile, and the sprite for a -coin. - -image::img/sprites_big.png[alt="Sprites for our game",width="1.4cm"] - -(((scaling)))Background tiles are 20 by 20 pixels, since we will use -the same scale that we used in `DOMDisplay`. Thus, the offset for lava -tiles is 20 (the value of the `scale` variable), and the offset for -walls is 0. - -(((drawing)))(((load event)))(((drawImage method)))We don't bother -waiting for the sprite image to load. Calling -`drawImage` with an image that hasn't been loaded yet will simply do -nothing. Thus, we might fail to draw the game properly for the first -few ((frame))s, while the image is still loading, but that is not a -serious problem. Since we keep updating the screen, the correct scene -will appear as soon as the loading finishes. - -(((player character)))(((animation)))(((drawing)))The ((walking)) -character shown earlier will be used to represent the player. The -code that draws it needs to pick the right ((sprite)) and direction -based on the player's current motion. The first eight sprites contain a -walking animation. When the player is moving along a floor, we cycle -through them based on the display's `animationTime` property. This is -measured in seconds, and we want to switch frames 12 times per -second, so the ((time)) is multiplied by 12 first. When the player is -standing still, we draw the ninth sprite. During jumps, which are -recognized by the fact that the vertical speed is not zero, we use the -tenth, rightmost sprite. - -(((flipHorizontally function)))(((CanvasDisplay type)))Because the -((sprite))s are slightly wider than the player object—24 instead of 16 -pixels, to allow some space for feet and arms—the method has to adjust -the x-coordinate and width by a given amount (`playerXOverlap`). - -// include_code - -[sandbox="game"] -[source,javascript] ----- -var playerSprites = document.createElement("img"); -playerSprites.src = "img/player.png"; -var playerXOverlap = 4; - -CanvasDisplay.prototype.drawPlayer = function(x, y, width, - height) { - var sprite = 8, player = this.level.player; - width += playerXOverlap * 2; - x -= playerXOverlap; - if (player.speed.x != 0) - this.flipPlayer = player.speed.x < 0; - - if (player.speed.y != 0) - sprite = 9; - else if (player.speed.x != 0) - sprite = Math.floor(this.animationTime * 12) % 8; - - this.cx.save(); - if (this.flipPlayer) - flipHorizontally(this.cx, x + width / 2); - - this.cx.drawImage(playerSprites, - sprite * width, 0, width, height, - x, y, width, height); - - this.cx.restore(); -}; ----- - -The `drawPlayer` method is called by `drawActors`, which is responsible for -drawing all the actors in the game. - -// include_code - -[sandbox="game"] -[source,javascript] ----- -CanvasDisplay.prototype.drawActors = function() { - this.level.actors.forEach(function(actor) { - var width = actor.size.x * scale; - var height = actor.size.y * scale; - var x = (actor.pos.x - this.viewport.left) * scale; - var y = (actor.pos.y - this.viewport.top) * scale; - if (actor.type == "player") { - this.drawPlayer(x, y, width, height); - } else { - var tileX = (actor.type == "coin" ? 2 : 1) * scale; - this.cx.drawImage(otherSprites, - tileX, 0, width, height, - x, y, width, height); - } - }, this); -}; ----- - -When ((drawing)) something that is not the ((player)), we look at its -type to find the offset of the correct sprite. The ((lava)) tile is -found at offset 20, and the ((coin)) sprite is found at 40 (two times `scale`). - -(((viewport)))We have to subtract the viewport's position when -computing the actor's position since (0,0) on our ((canvas)) -corresponds to the top left of the viewport, not the top left of the -level. We could also have used `translate` for this. Either way works. - -ifdef::interactive_target[] - -(((GAME_LEVELS data set)))(((game,with canvas)))The tiny document -shown next plugs the new display into `runGame`: - -// start_code - -[sandbox="game"] -[focus="yes"] -[source,text/html] ----- - - - ----- - -endif::interactive_target[] - -ifdef::book_target[] - -(((game,screenshot)))That concludes the new ((display)) system. The -resulting game looks something like this: - -image::img/canvas_game.png[alt="The game as shown on canvas",width="8cm"] - -endif::book_target[] - -[[graphics_tradeoffs]] -== Choosing a graphics interface == - -Whenever you need to generate graphics in the browser, you can choose -between plain ((HTML)), ((SVG)), and ((canvas)). There is no single -_best_ approach that works in all situations. Each option has -strengths and weaknesses. - -(((text wrapping)))Plain HTML has the advantage of being simple. It -also integrates well with ((text)). Both SVG and canvas allow you to -draw text, but they won't help you position that text or wrap it -when it takes up more than one line. In an HTML-based picture, it is -easy to include blocks of text. - -(((zooming)))(((SVG)))SVG can be used to produce ((crisp)) ((graphics)) -that look good at any zoom level. It is more difficult to use than -plain HTML but also much more powerful. - -(((DOM)))(((SVG)))(((event handling)))Both SVG and HTML build up a -((data structure)) (the DOM) that represents the picture. This makes -it possible to modify elements after they are drawn. If you need to -repeatedly change a small part of a big ((picture)) in response to -what the user is doing or as part of an ((animation)), doing it in a -canvas can be needlessly expensive. The DOM also allows us to register -mouse event handlers on every element in the picture (even on shapes -drawn with SVG). You can't do that with canvas. - -(((performance)))(((optimization)))But ((canvas))’s ((pixel))-oriented -approach can be an advantage when drawing a huge amount of tiny -elements. The fact that it does not build up a data structure but -only repeatedly draws onto the same pixel surface gives canvas a -lower cost per shape. - -(((ray tracer)))There are also effects, such as rendering a scene one -pixel at a time (for example, using a ray tracer) or postprocessing -an image with JavaScript (blurring or distorting it), that can only be -realistically handled by a ((pixel))-based technique. - -In some cases, you may want to combine several of these -techniques. For example, you might draw a ((graph)) with ((SVG)) or -((canvas)) but show ((text))ual information by positioning an -((HTML)) element on top of the picture. - -(((display)))For nondemanding applications, it really doesn't matter -much which interface you choose. The -link:16_canvas.html#canvasdisplay[second display] we built for our -game in this chapter could have been implemented using any of these -three ((graphics)) technologies since it does not need to draw text, -handle mouse interaction, or work with an extraordinarily large amount -of elements. - -== Summary == - -In this chapter, we discussed techniques for drawing graphics in the -browser, focusing on the `` element. - -A canvas node represents an area in a document that our program may -draw on. This drawing is done through a drawing context object, -created with the `getContext` method. - -The 2D drawing interface allows us to fill and stroke various shapes. -The context's `fillStyle` property determines how shapes are filled. The -`strokeStyle` and `lineWidth` properties control the way lines are drawn. - -Rectangles and pieces of text can be drawn with a single method call. -The `fillRect` and `strokeRect` methods draw rectangles, and the -`fillText` and `strokeText` methods draw text. To create custom shapes, -we must first build up a path. - -(((stroking)))(((filling)))Calling `beginPath` starts a new path. A -number of other methods add lines and curves to the current path. For -example, `lineTo` can add a straight line. When a path is -finished, it can be filled with the `fill` method or stroked with the -`stroke` method. - -Moving pixels from an image or another canvas onto our canvas is done -with the `drawImage` method. By default, this method draws the whole -source image, but by giving it more parameters, you can copy -a specific area of the image. We used this for our game by copying individual -poses of the game character out of an image that contained many -such poses. - -Transformations allow you to draw a shape in multiple orientations. -A 2D drawing context has a current transformation that can be changed -with the `translate`, `scale`, and `rotate` methods. These will affect -all subsequent drawing operations. A transformation state can be saved -with the `save` method and restored with the `restore` method. - -When drawing an animation on a canvas, the `clearRect` method can be -used to clear part of the canvas before redrawing it. - -== Exercises == - -=== Shapes === - -(((shapes (exercise))))Write a program that draws the following -((shape))s on a ((canvas)): - -1. A ((trapezoid)) (a ((rectangle)) that is wider on one side) - -2. (((rotation)))A red ((diamond)) (a rectangle rotated 45 degrees or ¼π radians) - -3. A zigzagging ((line)) - -4. A ((spiral)) made up of 100 straight line segments - -5. A yellow ((star)) - -image::img/exercise_shapes.png[alt="The shapes to draw",width="8cm"] - -When drawing the last two, you may want to refer to the -explanation of `Math.cos` and `Math.sin` in -link:13_dom.html#sin_cos[Chapter 13], which describes how to get -coordinates on a circle using these functions. - -(((readability)))(((hard-coding)))I recommend creating a function for -each shape. Pass the position, and optionally other properties, -such as the size or the number of points, as parameters. The -alternative, which is to hard-code numbers all over your code, tends -to make the code needlessly hard to read and modify. - -ifdef::interactive_target[] - -// test: no - -[source,text/html] ----- - - ----- - -endif::interactive_target[] - -!!hint!! - -(((path,canvas)))(((shapes (exercise))))The ((trapezoid)) (1) is easy to draw using -a path. Pick suitable center coordinates and add each of the four -corners around that. - -(((flipHorizontally function)))(((rotation)))The ((diamond)) (2) can -be drawn the easy way, with a path, or the interesting way, with a -`rotate` ((transformation)). To use rotation, you will have to apply a -trick similar to what we did in the `flipHorizontally` function. -Because you want to rotate around the center of your rectangle and -not around the point (0,0), you must first `translate` to there, then -rotate, and then translate back. - -(((remainder operator)))(((% operator)))For the ((zigzag)) (3) it -becomes impractical to write a new call to `lineTo` for each line -segment. Instead, you should use a ((loop)). You can have each -iteration draw either two ((line)) segments (right and then left again) or -one, in which case you must use the evenness (`% 2`) of the loop index -to determine whether to go left or right. - -You'll also need a loop for the ((spiral)) (4). If you draw a series -of points, with each point moving further along a circle around the -spiral's center, you get a circle. If, during the loop, you vary the -radius of the circle on which you are putting the current point and -go around more than once, the result is a spiral. - -(((quadraticCurveTo method)))The ((star)) (5) depicted is built out of -`quadraticCurveTo` lines. You could also draw one with straight lines. -Divide a circle into eight pieces, or a piece for each point you want your -star to have. Draw lines between these points, making them curve -toward the center of the star. With `quadraticCurveTo`, you can use -the center as the control point. - -!!hint!! - -[[exercise_pie_chart]] -=== The pie chart === - -(((label)))(((text)))(((pie chart -example)))link:16_canvas.html#pie_chart[Earlier] in the chapter, we -saw an example program that drew a pie chart. Modify this program so -that the name of each category is shown next to the slice that -represents it. Try to find a pleasing-looking way to automatically -position this text, which would work for other data sets as well. You -may assume that categories are no smaller than 5 percent (that is, there won't be -a bunch of tiny ones next to each other). - -You might again need `Math.sin` and `Math.cos`, as described in the -previous exercise. - -ifdef::interactive_target[] - -// test: no - -[source,text/html] ----- - - ----- - -endif::interactive_target[] - -!!hint!! - -(((fillText method)))(((textAlign property)))(((textBaseline -property)))(((pie chart example)))You will need to call `fillText` -and set the context's `textAlign` and `textBaseline` properties in -such a way that the text ends up where you want it. - -A sensible way to position the labels would be to put the text on the -line going from the center of the pie through the middle of the slice. -You don't want to put the text directly against the side of the pie -but rather move the text out to the side of the pie by a given number of pixels. - -The ((angle)) of this line is `currentAngle + 0.5 * sliceAngle`. The -following code finds a position on this line, 120 pixels from the center: - -// test: no - -[source,javascript] ----- -var middleAngle = currentAngle + 0.5 * sliceAngle; -var textX = Math.cos(middleAngle) * 120 + centerX; -var textY = Math.sin(middleAngle) * 120 + centerY; ----- - -For `textBaseline`, the value `"middle"` is probably appropriate when -using this approach. What to use for `textAlign` depends on the side -of the circle we are on. On the left, it should be `"right"`, and on -the right, it should be `"left"` so that the text is positioned away -from the pie. - -(((Math.cos function)))If you are not sure how to find out which side -of the circle a given angle is on, look to the explanation of -`Math.cos` in the previous exercise. The cosine of an angle tells us -which x-coordinate it corresponds to, which in turn tells us exactly -which side of the circle we are on. - -!!hint!! - -=== A bouncing ball === - -(((animation)))(((requestAnimationFrame function)))(((bouncing)))Use -the `requestAnimationFrame` technique that we saw in -link:13_dom.html#animationFrame[Chapter 13] and -link:15_game.html#runAnimation[Chapter 15] to draw a ((box)) with a -bouncing ((ball)) in it. The ball moves at a constant -((speed)) and bounces off the box's sides when it hits them. - -ifdef::interactive_target[] - -// test: no - -[source,text/html] ----- - - ----- - -endif::interactive_target[] - -!!hint!! - -(((strokeRect method)))(((animation)))(((arc method)))A ((box)) is -easy to draw with `strokeRect`. Define a variable that holds its size -or define two variables if your box's width and height differ. To create a -round ((ball)), start a path, call ++arc(x, y, radius, 0, 7)++, which creates an arc -going from zero to more than a whole circle, and fill it. - -(((collision detection)))(((Vector type)))To model the ball's position -and ((speed)), you can use the `Vector` type from -link:15_game.html#vector[Chapter 15](!interactive (which is available on this -page)!). Give it a starting speed, preferably one that is not purely -vertical or horizontal, and every ((frame)), multiply that speed with -the amount of time that elapsed. When the ball gets too close to a -vertical wall, invert the x component in its speed. Likewise, invert -the y component when it hits a horizontal wall. - -(((clearRect method)))(((clearing)))After finding the ball's new -position and speed, use `clearRect` to delete the scene and redraw it -using the new position. - -!!hint!! - -=== Precomputed mirroring === - -(((optimization)))(((bitmap graphics)))(((mirror)))One unfortunate -thing about ((transformation))s is that they slow down drawing of -bitmaps. For vector graphics, the effect is less serious since -only a few points (for example, the center of a circle) need to be -transformed, after which drawing can happen as normal. For a bitmap -image, the position of each ((pixel)) has to be transformed, and -though it is possible that ((browser))s will get more clever about -this in the ((future)), this currently causes a measurable increase in -the time it takes to draw a bitmap. - -In a game like ours, where we are drawing only a single transformed -sprite, this is a nonissue. But imagine that we need to draw hundreds -of characters or thousands of rotating particles from an explosion. - -Think of a way to allow us to draw an inverted character without -loading additional image files and without having to make transformed -`drawImage` calls every frame. - -!!hint!! - -(((mirror)))(((scaling)))(((drawImage method)))The key to the solution -is the fact that we can use a ((canvas)) element as a source image -when using `drawImage`. It is possible to create an extra `` -element, without adding it to the document, and draw our inverted -sprites to it, once. When drawing an actual frame, we just copy the -already inverted sprites to the main canvas. - -(((load event)))Some care would be required because images do not load -instantly. We do the inverted drawing only once, and if we do it -before the image loads, it won't draw anything. A `"load"` handler on -the image can be used to draw the inverted images to the extra canvas. -This canvas can be used as a drawing source immediately (it'll simply -be blank until we draw the character onto it). - -!!hint!! diff --git a/17_http.txt b/17_http.txt deleted file mode 100644 index b6e9207c1..000000000 --- a/17_http.txt +++ /dev/null @@ -1,991 +0,0 @@ -:chap_num: 17 -:prev_link: 16_canvas -:next_link: 18_forms -:load_files: ["code/chapter/17_http.js", "code/promise.js"] - -= HTTP = - -[chapterquote="true"] -[quote,Tim Berners-Lee,The World Wide Web: A very short personal history] -____ -The dream behind the Web is of a common information space in which we -communicate by sharing information. Its universality is essential: the -fact that a hypertext link can point to anything, be it personal, -local or global, be it draft or highly polished. -____ - -(((Berners-Lee+++,+++ Tim)))(((World Wide Web)))(((HTTP)))The -_Hypertext Transfer Protocol_, already mentioned in -link:12_browser.html#web[Chapter 12], is the mechanism through which -data is requested and provided on the ((World Wide Web)). This chapter -describes the ((protocol)) in more detail and explains the way ((browser)) -JavaScript has access to it. - -== The protocol == - -(((IP address)))If you type _eloquentjavascript.net/17_http.html_ into -your browser's ((address bar)), the ((browser)) first looks up the -((address)) of the server associated with _eloquentjavascript.net_ -and tries to open a ((TCP)) ((connection)) to it on ((port)) 80, the -default port for ((HTTP)) traffic. If the ((server)) exists and -accepts the connection, the browser sends something like this: - -[source,http] ----- -GET /17_http.html HTTP/1.1 -Host: eloquentjavascript.net -User-Agent: Your browser's name ----- - -Then the server responds, through that same connection. - -[source,http] ----- -HTTP/1.1 200 OK -Content-Length: 65585 -Content-Type: text/html -Last-Modified: Wed, 09 Apr 2014 10:48:09 GMT - - -... the rest of the document ----- - -The browser then takes the part of the ((response)) after the blank -line and displays it as an ((HTML)) document. - -(((HTTP)))The information sent by the client is called the -_((request))_. It starts with this line: - -[source,http] ----- -GET /17_http.html HTTP/1.1 ----- - -(((DELETE method)))(((PUT method)))(((GET method)))The first word is -the _((method))_ of the ((request)). `GET` means that we want to _get_ -the specified resource. Other common methods are `DELETE` to delete a -resource, `PUT` to replace it, and `POST` to send information to it. -Note that the ((server)) is not obliged to carry out every request it -gets. If you walk up to a random website and tell it to `DELETE` its -main page, it'll probably refuse. - -(((path,URL)))(((Twitter)))The part after the ((method)) name is the path of the -((resource)) the request applies to. In the simplest case, a resource -is simply a ((file)) on the ((server)), but the protocol doesn't -require it to be. A resource may be anything that can be transferred _as if_ -it is a file. Many servers generate the responses they produce on the -fly. For example, if you open -http://twitter.com/marijnjh[_twitter.com/marijnjh_], the server looks -in its database for a user named _marijnjh_, and if it finds one, it -will generate a profile page for that user. - -After the resource path, the first line of the request mentions -`HTTP/1.1` to indicate the ((version)) of the ((HTTP)) ((protocol)) -it is using. - -(((status code)))The server's ((response)) will start with a version -as well, followed by the status of the response, first as a -three-digit status code and then as a human-readable string. - -[source,http] ----- -HTTP/1.1 200 OK ----- - -(((200 (HTTP status code))))(((error response)))(((404 (HTTP status -code))))Status codes starting with a 2 indicate that the request succeeded. -Codes starting with 4 mean there was something wrong with the -((request)). 404 is probably the most famous HTTP status code—it means -that the resource that was requested could not be found. Codes that -start with 5 mean an error happened on the ((server)) and the request -is not to blame. - -[[headers]] -(((HTTP)))The first line of a request or response may be followed by -any number of _((header))s_. These are lines in the form “name: value” -that specify extra information about the request or response. These -headers were part of the example ((response)): - ----- -Content-Length: 65585 -Content-Type: text/html -Last-Modified: Wed, 09 Apr 2014 10:48:09 GMT ----- - -(((Content-Length header)))(((Content-Type header)))(((Last-Modified -header)))This tells us the size and type of the response document. In -this case, it is an HTML document of 65,585 bytes. It also tells us when -that document was last modified. - -(((Host header)))(((domain)))For the most part, a client or server -decides which ((header))s to include in a ((request)) or ((response)), -though a few headers are required. For example, the `Host` header, -which specifies the hostname, should be included in a request -because a ((server)) might be serving multiple hostnames on a single -((IP address)), and without that header, the server won't know which host the -client is trying to talk to. - -(((GET method)))(((DELETE method)))(((PUT method)))(((POST -method)))(((body (HTTP))))After the headers, both requests and -responses may include a blank line followed by a _body_, which -contains the data being sent. `GET` and `DELETE` requests don't send -along any data, but `PUT` and `POST` requests do. -Similarly, some response types, such as error responses, do not -require a body. - -== Browsers and HTTP == - -(((HTTP)))As we saw in the example, a ((browser)) will make a request -when we enter a ((URL)) in its ((address bar)). When the resulting -HTML page references other files, such as ((image))s and JavaScript -((file))s, those are also fetched. - -(((parallelism)))A moderately complicated ((website)) can easily -include anywhere from 10 to 200 ((resource))s. To be able to -fetch those quickly, browsers will make several requests -simultaneously, rather than waiting for the responses one at a time. -(((GET method)))Such documents are always fetched using `GET` -((request))s. - -[[http_forms]] -HTML pages may include _((form))s_, which allow -the user to fill out information and send it to the server. This is an -example of a form: - -[source,text/html] ----- -
    -

    Name:

    -

    Message:

    -

    -
    ----- - -(((form)))(((method attribute)))(((GET method)))This code describes a form with two -((field))s: a small one asking for a name and a larger one to write a -message in. When you click the Send ((button)), the information in -those fields will be encoded into a _((query string))_. When the -`
    ` element's `method` attribute is `GET` (or is omitted), that -query string is tacked onto the `action` URL, and the browser makes a -`GET` request to that URL. - -[source,text/html] ----- -GET /example/message.html?name=Jean&message=Yes%3F HTTP/1.1 ----- - -(((ampersand character)))The start of a ((query string)) is indicated -by a ((question mark)). After that follow pairs of names and values, -corresponding to the `name` attribute on the form field elements and -the content of those elements, respectively. An ampersand character (`&`) is used to separate -the pairs. - -(((escaping,in URLs)))(((hexadecimal number)))(((percent -sign)))(((URL encoding)))(((encodeURIComponent -function)))(((decodeURIComponent function)))The actual message encoded -in the previous URL is “Yes?”, even though the question mark is replaced -by a strange code. Some characters in query strings must be -escaped. The question mark, represented as `%3F`, is one of those. -There seems to be an unwritten rule that every format needs its -own way of escaping characters. This one, called _URL -encoding_, uses a percent sign followed by two hexadecimal digits -that encode the character code. In this case, 3F, which is 63 in -decimal notation, is the code of a question mark character. JavaScript -provides the `encodeURIComponent` and `decodeURIComponent` functions -to encode and decode this format. - -[source,javascript] ----- -console.log(encodeURIComponent("Hello & goodbye")); -// → Hello%20%26%20goodbye -console.log(decodeURIComponent("Hello%20%26%20goodbye")); -// → Hello & goodbye ----- - -(((body (HTTP))))(((POST method)))If we change the `method` attribute -of the HTML form in the example we saw earlier to `POST`, the ((HTTP)) request made to submit the -((form)) will use the `POST` method and put the ((query string)) in -body of the request, rather than adding it to the URL. - -[source,http] ----- -POST /example/message.html HTTP/1.1 -Content-length: 24 -Content-type: application/x-www-form-urlencoded - -name=Jean&message=Yes%3F ----- - -By convention, the `GET` method is used for requests that do not have -side effects, such as doing a search. Requests that change something on -the server, such as creating a new account or posting a message, should -be expressed with other methods, such as `POST`. Client-side software, -such as a browser, knows that it shouldn't blindly make `POST` -requests but will often implicitly make `GET` requests—for example, to -prefetch a resource it believes the user will soon need. - -The link:18_forms.html#forms[next chapter] will return to forms -and talk about how we can script them with JavaScript. - -[[xmlhttprequest]] -== XMLHttpRequest == - -(((capitalization)))(((XMLHttpRequest)))The ((interface)) through -which browser JavaScript can make HTTP requests is called -`XMLHttpRequest` (note the inconsistent capitalization). It was -designed by ((Microsoft)), for its ((Internet Explorer)) -((browser)), in the late 1990s. During this time, the ((XML)) file format -was _very_ popular in the world of ((business software))—a world where -Microsoft has always been at home. In fact, it was so popular that the -acronym XML was tacked onto the front of the name of an interface for -((HTTP)), which is in no way tied to XML. - -(((modularity)))(((interface,design)))The name isn't completely -nonsensical, though. The interface allows you to parse response documents as -XML if you want. Conflating two distinct concepts (making a request -and ((parsing)) the response) into a single thing is terrible design, -of course, but so it goes. - -When the `XMLHttpRequest` interface was added to Internet Explorer, it -allowed people to do things with JavaScript that had been very hard -before. For example, websites started showing lists of suggestions -when the user was typing something into a text field. The script would -send the text to the server over ((HTTP)) as the user typed. The ((server)), -which had some ((database)) of possible inputs, would -match the database entries against the partial input and send back possible -((completion))s to show the user. This was -considered spectacular—people were used to waiting for a full page reload -for every interaction with a website. - -(((compatibility)))(((Firefox)))(((XMLHttpRequest)))The other -significant browser at that time, ((Mozilla)) (later Firefox), did not -want to be left behind. To allow people to do similarly neat things in -_its_ browser, Mozilla copied the interface, including the bogus name. -The next generation of ((browser))s followed this example, and today -`XMLHttpRequest` is a de facto standard ((interface)). - -== Sending a request == - -(((open method)))(((send method)))(((XMLHttpRequest)))To make a simple -((request)), we create a request object with the `XMLHttpRequest` -constructor and call its `open` and `send` methods. - -// test: trim - -[source,javascript] ----- -var req = new XMLHttpRequest(); -req.open("GET", "example/data.txt", false); -req.send(null); -console.log(req.responseText); -// → This is the content of data.txt ----- - -(((path,URL)))(((open method)))(((relative URL)))(((slash character)))The `open` -method configures the request. In this case, we choose to make a `GET` -request for the _example/data.txt_ file. ((URL))s that don't start -with a protocol name (such as _http:_) are relative, which means that -they are interpreted relative to the current document. When they start -with a slash (/), they replace the current path, which is the part after the -server name. When they do not, the part of the current path up to -and including its last slash character is put in front of the relative -URL. - -(((send method)))(((GET method)))(((body (HTTP))))(((responseText -property)))After opening the request, we can send it with the `send` -method. The argument to send is the request body. For `GET` requests, -we can pass `null`. If the third argument to `open` was `false`, `send` -will return only after the response to our request was received. We -can read the request object's `responseText` property to get the -response body. - -(((status property)))(((statusText -property)))(((header)))(((getResponseHeader method)))The other -information included in the response can also be extracted from this -object. The ((status code)) is accessible through the `status` -property, and the human-readable status text is accessible through `statusText`. -Headers can be read with `getResponseHeader`. - -// test: no - -[source,javascript] ----- -var req = new XMLHttpRequest(); -req.open("GET", "example/data.txt", false); -req.send(null); -console.log(req.status, req.statusText); -// → 200 OK -console.log(req.getResponseHeader("content-type")); -// → text/plain ----- - -(((case sensitivity)))(((capitalization)))Header names are -case-insensitive. They are usually written with a capital letter at -the start of each word, such as “Content-Type”, but “content-type” and -“cOnTeNt-TyPe” refer to the same header. - -(((Host header)))(((setRequestHeader method)))The browser will -automatically add some request ((header))s, such as “Host” and those -needed for the server to figure out the size of the body. But you can -add your own headers with the `setRequestHeader` method. This is -needed only for advanced uses and requires the cooperation of the -((server)) you are talking to—a server is free to ignore headers it -does not know how to handle. - -== Asynchronous Requests == - -(((XMLHttpRequest)))(((event handling)))(((blocking)))(((synchronous -I/O)))(((responseText property)))(((send method)))In the examples we -saw, the request has finished when the call to `send` returns. This is -convenient because it means properties such as `responseText` are -available immediately. But it also means that our program is suspended -as long as the ((browser)) and server are communicating. When the -((connection)) is bad, the server is slow, or the file is big, that -might take quite a while. Worse, because no event handlers can fire -while our program is suspended, the whole document will become -unresponsive. - -(((XMLHttpRequest)))(((open method)))(((asynchronous I/O)))If we pass -`true` as the third argument to `open`, the request is _asynchronous_. -This means that when we call `send`, the only thing that happens right -away is that the request is scheduled to be sent. Our program can -continue, and the browser will take care of the sending and receiving -of data in the background. - -But as long as the request is running, we won't be able to access the -response. We need a mechanism that will notify us when the data is -available. - -(((event handling)))(((load event)))For this, we must listen for the -`"load"` event on the request object. - -[source,javascript] ----- -var req = new XMLHttpRequest(); -req.open("GET", "example/data.txt", true); -req.addEventListener("load", function() { - console.log("Done:", req.status); -}); -req.send(null); ----- - -(((asynchronous programming)))(((callback function)))Just like the use -of `requestAnimationFrame` in link:15_game.html#game[Chapter 15], this -forces us to use an asynchronous style of programming, wrapping the -things that have to be done after the request in a function and -arranging for that to be called at the appropriate time. We will come -back to this link:17_http.html#promises[later]. - -== Fetching XML Data == - -(((documentElement property)))(((responseXML property)))When the -resource retrieved by an `XMLHttpRequest` object is an ((XML)) -document, the object's `responseXML` property will hold a parsed -representation of this document. This representation works much like -the ((DOM)) discussed in link:13_dom.html#dom[Chapter 13], except that -it doesn't have HTML-specific functionality like the `style` property. -The object that `responseXML` holds corresponds to the `document` -object. Its `documentElement` property refers to the outer tag of the -XML document. In the following document (_example/fruit.xml_), that -would be the `` tag: - -[source,application/xml] ----- - - - - - ----- - -We can retrieve such a file like this: - -// test: no - -[source,javascript] ----- -var req = new XMLHttpRequest(); -req.open("GET", "example/fruit.xml", false); -req.send(null); -console.log(req.responseXML.querySelectorAll("fruit").length); -// → 3 ----- - -(((data format)))XML documents can be used to exchange structured -information with the server. Their form—tags nested inside other -tags—lends itself well to storing most types of data, or at least -better than flat text files. The DOM interface is rather clumsy for -extracting information, though, and ((XML)) documents tend to be -verbose. It is often a better idea to communicate using ((JSON)) data, -which is easier to read and write, both for programs and for humans. - -[source,javascript] ----- -var req = new XMLHttpRequest(); -req.open("GET", "example/fruit.json", false); -req.send(null); -console.log(JSON.parse(req.responseText)); -// → {banana: "yellow", lemon: "yellow", cherry: "red"} ----- - -[[http_sandbox]] -== HTTP sandboxing == - -(((sandbox)))Making ((HTTP)) requests in web page scripts once -again raises concerns about ((security)). The person who controls the -script might not have the same interests as the person on whose -computer it is running. More specifically, if I visit _themafia.org_, -I do not want its scripts to be able to make a request to -_mybank.com_, using identifying information from my ((browser)), with -instructions to transfer all my money to some random ((mafia)) -account. - -It is possible for ((website))s to protect themselves against such -((attack))s, but that requires effort, and many websites fail to do it. -For this reason, browsers protect us by disallowing scripts to make -HTTP requests to other _((domain))s_ (names such as _themafia.org_ and -_mybank.com_). - -(((Access-Control-Allow-Origin header)))(((cross-domain request)))This -can be an annoying problem when building systems that want to access -several domains for legitimate reasons. Fortunately, ((server))s can -include a ((header)) like this in their ((response)) to explicitly -indicate to browsers that it is okay for the request to come from -other domains: - ----- -Access-Control-Allow-Origin: * ----- - -== Abstracting requests == - -(((HTTP)))(((XMLHttpRequest)))(((backgroundReadFile function)))In -link:10_modules.html#amd[Chapter 10], in our implementation of the AMD -module system, we used a hypothetical function called -`backgroundReadFile`. It took a filename and a function and called -that function with the contents of the file when it had finished -fetching it. Here's a simple implementation of that function: - -// include_code - -[source,javascript] ----- -function backgroundReadFile(url, callback) { - var req = new XMLHttpRequest(); - req.open("GET", url, true); - req.addEventListener("load", function() { - if (req.status < 400) - callback(req.responseText); - }); - req.send(null); -} ----- - -(((XMLHttpRequest)))This simple ((abstraction)) makes it easier to use -`XMLHttpRequest` for simple `GET` requests. If you are writing a -program that has to make HTTP requests, it is a good idea to use a -helper function so that you don't end up repeating the ugly -`XMLHttpRequest` pattern all through your code. - -(((function,as value)))(((callback function)))The function argument's -name, `callback`, is a term that is often used to describe functions -like this. A callback function is given to other code to provide that -code with a way to “call us back” later. - -(((library)))It is not hard to write an HTTP utility function, tailored to what your -application is doing. The previous one does only `GET` requests and -doesn't give us control over the headers or the request body. You -could write another variant for `POST` requests or a more generic one -that supports various kinds of requests. Many JavaScript libraries -also provide wrappers for `XMLHttpRequest`. - -(((user experience)))(((error response)))The main problem with the previous -wrapper is its handling of ((failure)). When the request returns -a ((status code)) that indicates an error (400 and up), it does -nothing. This might be okay, in some circumstances, but imagine we put -a “loading” indicator on the page to indicate that we are fetching -information. If the request fails because the server crashed or the -((connection)) is briefly interrupted, the page will just sit there, -misleadingly looking like it is doing something. The user will wait -for a while, get impatient, and consider the site uselessly flaky. - -We should also have an option to be notified when the request fails -so that we can take appropriate action. For example, we could remove the -“loading” message and inform the user that something went wrong. - -(((exception handling)))(((callback function)))(((error -handling)))(((asynchronous programming)))(((try -keyword)))(((stack)))Error handling in asynchronous code is even -trickier than error handling in synchronous code. Because we often need -to defer part of our work, putting it in a callback function, the -scope of a `try` block becomes meaningless. In the following code, the -exception will _not_ be caught because the call to -`backgroundReadFile` returns immediately. Control then leaves the -`try` block, and the function it was given won't be called until -later. - -// test: no - -[source,javascript] ----- -try { - backgroundReadFile("example/data.txt", function(text) { - if (text != "expected") - throw new Error("That was unexpected"); - }); -} catch (e) { - console.log("Hello from the catch block"); -} ----- - -[[getURL]] -(((HTTP)))(((getURL function)))(((exception)))To handle failing -requests, we have to allow an additional function to be passed to our -wrapper and call that when a request goes wrong. Alternatively, we -can use the convention that if the request fails, an additional -argument describing the problem is passed to the regular callback -function. Here's an example: - -// include_code - -[source,javascript] ----- -function getURL(url, callback) { - var req = new XMLHttpRequest(); - req.open("GET", url, true); - req.addEventListener("load", function() { - if (req.status < 400) - callback(req.responseText); - else - callback(null, new Error("Request failed: " + - req.statusText)); - }); - req.addEventListener("error", function() { - callback(null, new Error("Network error")); - }); - req.send(null); -} ----- - -(((error event)))We have added a handler for the `"error"` event, -which will be signaled when the request fails entirely. We also call -the ((callback function)) with an error argument when the request -completes with a ((status code)) that indicates an error. - -Code using `getURL` must then check whether an error was given and, if -it finds one, handle it. - -[source,javascript] ----- -getURL("data/nonsense.txt", function(content, error) { - if (error != null) - console.log("Failed to fetch nonsense.txt: " + error); - else - console.log("nonsense.txt: " + content); -}); ----- - -(((uncaught exception)))(((exception handling)))(((try keyword)))This -does not help when it comes to exceptions. When chaining several -asynchronous actions together, an exception at any point of the chain -will still (unless you wrap each handling function in its own -`try/catch` block) land at the top level and abort your chain of -actions. - -[[promises]] -== Promises == - -(((promise)))(((asynchronous programming)))(((callback -function)))(((readability)))(((uncaught exception)))For complicated -projects, writing asynchronous code in plain callback style is hard to -do correctly. It is easy to forget to check for an error or to allow -an unexpected exception to cut the program short in a crude way. -Additionally, arranging for correct error handling when the error has -to flow through multiple callback functions and `catch` blocks is -tedious. - -(((future)))(((ECMAScript 6)))There have been a lot of attempts to -solve this with extra abstractions. One of the more successful ones is -called _promises_. Promises wrap an asynchronous action in an object, -which can be passed around and told to do certain things when the -action finishes or fails. This interface is set to become part of the next -version of the JavaScript language but can already be used as a -library. - -The ((interface)) for promises isn't entirely intuitive, but it is -powerful. This chapter will only roughly describe it. You can find a more thorough -treatment at -https://www.promisejs.org/[_www.promisejs.org_]. - -(((Promise constructor)))To create a promise object, we call the -`Promise` constructor, giving it a function that initializes the -asynchronous action. The constructor calls that function, passing it -two arguments, which are themselves functions. The first should be -called when the action finishes successfully, and the second should be called when it -fails. - -(((HTTP)))(((get function)))Once again, here is our wrapper for `GET` -requests, this time returning a promise. We'll simply call it `get` -this time. - -// include_code - -[source,javascript] ----- -function get(url) { - return new Promise(function(succeed, fail) { - var req = new XMLHttpRequest(); - req.open("GET", url, true); - req.addEventListener("load", function() { - if (req.status < 400) - succeed(req.responseText); - else - fail(new Error("Request failed: " + req.statusText)); - }); - req.addEventListener("error", function() { - fail(new Error("Network error")); - }); - req.send(null); - }); -} ----- - -Note that the ((interface)) to the function itself is now a lot -simpler. You give it a URL, and it returns a ((promise)). That promise -acts as a _handle_ to the request's outcome. It has a `then` method -that you can call with two functions: one to handle success and one -to handle failure. - -[source,javascript] ----- -get("example/data.txt").then(function(text) { - console.log("data.txt: " + text); -}, function(error) { - console.log("Failed to fetch data.txt: " + error); -}); ----- - -(((chaining)))So far, this is just another way to express the same -thing we already expressed. It is only when you need to chain -actions together that promises make a significant difference. - -(((then method)))Calling `then` produces a new ((promise)), whose -result (the value passed to success handlers) depends on the return -value of the first function we passed to `then`. This function may -return another promise to indicate that more asynchronous work is -being done. In this case, the promise returned by `then` itself will -wait for the promise returned by the handler function, succeeding or -failing with the same value when it is resolved. When the handler -function returns a nonpromise value, the promise returned by `then` -immediately succeeds with that value as its result. - -(((then method)))(((chaining)))This means you can use `then` to -transform the result of a promise. For example, this returns a promise -whose result is the content of the given URL, parsed as ((JSON)): - -// include_code - -[source,javascript] ----- -function getJSON(url) { - return get(url).then(JSON.parse); -} ----- - -(((error handling)))That last call to `then` did not specify a failure -handler. This is allowed. The error will be passed to the promise -returned by `then`, which is exactly what we want—`getJSON` does not -know what to do when something goes wrong, but hopefully its caller -does. - -As an example that shows the use of ((promise))s, we will build a -program that fetches a number of JSON files from the server and, -while it is doing that, shows the word _loading_. The JSON files -contain information about people, with links to files that represent -other people in properties such as `father`, `mother`, or `spouse`. - -(((error message)))(((JSON)))We want to get the name of the mother of -the spouse of _example/bert.json_. And if something goes wrong, we -want to remove the _loading_ text and show an error message instead. -Here is how that might be done with ((promise))s: - -[source,text/html] ----- - ----- - -(((error handling)))(((catch method)))(((then -method)))(((readability)))(((program size)))The resulting program is -relatively compact and readable. The `catch` method is similar to -`then`, except that it only expects a failure handler and will pass -through the result unmodified in case of success. Much like with the -`catch` clause for the `try` statement, control will continue as -normal after the failure is caught. That way, the final `then`, which -removes the loading message, is always executed, even if something -went wrong. - -(((asynchronous programming)))(((domain-specific language)))You can -think of the promise interface as implementing its own language for -asynchronous ((control flow)). The extra method calls and function -expressions needed to achieve this make the code look somewhat -awkward but not remotely as awkward as it would look if we took care -of all the error handling ourselves. - -== Appreciating HTTP == - -(((client)))(((HTTP)))When building a system that requires -((communication)) between a JavaScript program running in the -((browser)) (client-side) and a program on a ((server)) (server-side), -there are several different ways to model this communication. - -(((network)))(((abstraction)))A commonly used model is that of -_((remote procedure call))s_. In this model, communication follows the -patterns of normal function calls, except that the function is -actually running on another machine. Calling it involves making a -request to the server that includes the function's name and arguments. -The response to that request contains the returned value. - -When thinking in terms of remote procedure calls, HTTP is just a -vehicle for communication, and you will most likely write an -abstraction layer that hides it entirely. - -(((media type)))(((document format)))Another approach is to build your -communication around the concept of ((resource))s and ((HTTP)) -((method))s. Instead of a remote procedure called `addUser`, you use a -`PUT` request to `/users/larry`. Instead of encoding that user's -properties in function arguments, you define a document format or use -an existing format that represents a user. The body of the `PUT` request -to create a new resource is then simply such a document. A resource is -fetched by making a `GET` -request to the resource's URL (for example, `/user/larry`), which -returns the document representing the resource. - -This second approach makes it easier to use some of the features that -HTTP provides, such as support for caching resources (keeping a copy -on the client side). It can also help the coherence of your interface -since resources are easier to reason about than a jumble of functions. - -== Security and HTTPS == - -(((man-in-the-middle)))(((security)))(((HTTPS)))Data traveling over -the Internet tends to follow a long, dangerous road. To get -to its destination, it must hop through anything from coffee-shop Wi-Fi -((network))s to networks controlled by various companies and states. -At any point along its route it may be inspected or even modified. - -(((tampering)))If it is important that something remain secret, -such as the ((password)) to your ((email)) account, or that it arrive -at its destination unmodified, such as the account number you transfer -money to from your bank's website, plain HTTP is not good enough. - -indexsee:[Secure HTTP,HTTPS] -(((cryptography)))(((encryption)))The secure ((HTTP)) protocol, whose -((URL))s start with _https://_, wraps HTTP traffic in a way that makes -it harder to read and tamper with. First, the client verifies that the -server is who it claims to be by requiring that server to prove that it has a -cryptographic ((certificate)) issued by a certificate authority that -the ((browser)) recognizes. Next, all data going over the -((connection)) is encrypted in a way that should prevent eavesdropping -and tampering. - -Thus, when it works right, ((HTTPS)) prevents both the -someone impersonating the website you were trying to talk to and the -someone snooping on your communication. It is not -perfect, and there have been various incidents where HTTPS failed because of -forged or stolen certificates and broken software. Still, plain -HTTP is trivial to mess with, whereas breaking HTTPS requires the kind -of effort that only states or sophisticated criminal organizations can -hope to make. - -== Summary == - -In this chapter, we saw that HTTP is a protocol for accessing -resources over the Internet. A _client_ sends a request, which -contains a method (usually `GET`) and a path that identifies a -resource. The _server_ then decides what to do with the request and -responds with a status code and a response body. Both requests and -responses may contain headers that provide additional information. - -Browsers make `GET` requests to fetch the resources needed to display -a web page. A web page may also contain forms, which allow information -entered by the user to be sent along in the request made when the form -is submitted. You will learn more about that in the link:18_forms.html#forms[next -chapter]. - -The interface through which browser JavaScript can make HTTP requests -is called `XMLHttpRequest`. You can usually ignore the “XML” part of -that name (but you still have to type it). There are two ways in which -it can be used—synchronous, which blocks everything until the request -finishes, and asynchronous, which requires an event handler to notice -that the response came in. In almost all cases, asynchronous is -preferable. Making a request looks like this: - -[source,javascript] ----- -var req = new XMLHttpRequest(); -req.open("GET", "example/data.txt", true); -req.addEventListener("load", function() { - console.log(req.status); -}); -req.send(null); ----- - -Asynchronous programming is tricky. _Promises_ are an interface that -makes it slightly easier by helping route error conditions and -exceptions to the right handler and by abstracting away some of the more -repetitive and error-prone elements in this style of programming. - -== Exercises == - -[[exercise_accept]] -=== Content negotiation === - -(((Accept header)))(((media type)))(((document format)))(((content -negotiation (exercise))))One of the things that HTTP can do, but that -we have not discussed in this chapter, is called _content -negotiation_. The `Accept` header for a request can be used to tell -the server what type of document the client would like to get. Many -servers ignore this header, but when a server knows of various ways to -encode a resource, it can look at this header and send the one that -the client prefers. - -(((media type)))(((MIME type)))The URL -http://eloquentjavascript.net/author[_eloquentjavascript.net/author_] -is configured to respond with either plaintext, HTML, or JSON, -depending on what the client asks for. These formats are identified by -the standardized _media types_ `text/plain`, `text/html`, and -`application/json`. - -(((setRequestHeader method)))(((XMLHttpRequest)))Send requests to -fetch all three formats of this resource. Use the `setRequestHeader` -method of your `XMLHttpRequest` object to set the header named `Accept` -to one of the media types given earlier. Make sure you set the header -_after_ calling `open` but before calling `send`. - -Finally, try asking for the media type `application/rainbows+unicorns` -and see what happens. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -// Your code here. ----- - -endif::interactive_target[] - -!!hint!! - -(((synchronous I/O)))(((content negotiation (exercise))))See the -various examples of using an `XMLHttpRequest` in this chapter for an -example of the method calls involved in making a request. You can use -a synchronous request (by setting the third parameter to `open` to -`false`) if you want. - -(((406 (HTTP status code))))(((Accept header)))Asking for a bogus -media type will return a response with code 406, “Not acceptable”, -which is the code a server should return when it can't fulfill the -`Accept` header. - -!!hint!! - -=== Waiting for multiple promises === - -(((all function)))(((Promise constructor)))The `Promise` constructor -has an `all` method that, given an array of ((promise))s, returns a -promise that waits for all of the promises in the array to finish. It then succeeds, -yielding an array of result values. If any of the promises in -the array fail, the promise returned by `all` fails too (with the -failure value from the failing promise). - -Try to implement something like this yourself as a regular function -called `all`. - -Note that after a promise is resolved (has succeeded or failed), it -can't succeed or fail again, and further calls to the functions that -resolve it are ignored. This can simplify the way you handle failure -of your promise. - -ifdef::interactive_target[] - -// test: no - -[source,javascript] ----- -function all(promises) { - return new Promise(function(success, fail) { - // Your code here. - }); -} - -// Test code. -all([]).then(function(array) { - console.log("This should be []:", array); -}); -function soon(val) { - return new Promise(function(success) { - setTimeout(function() { success(val); }, - Math.random() * 500); - }); -} -all([soon(1), soon(2), soon(3)]).then(function(array) { - console.log("This should be [1, 2, 3]:", array); -}); -function fail() { - return new Promise(function(success, fail) { - fail(new Error("boom")); - }); -} -all([soon(1), fail(), soon(3)]).then(function(array) { - console.log("We should not get here"); -}, function(error) { - if (error.message != "boom") - console.log("Unexpected failure:", error); -}); ----- - -endif::interactive_target[] - -!!hint!! - -(((all function)))(((Promise constructor)))(((then method)))The -function passed to the `Promise` constructor will have to call `then` -on each of the promises in the given array. When one of them succeeds, -two things need to happen. The resulting value needs to be stored in -the correct position of a result array, and we must check whether this -was the last pending ((promise)) and finish our own promise if it -was. - -(((counter variable)))The latter can be done with a counter, which is -initialized to the length of the input array and from which we subtract -1 every time a promise succeeds. When it reaches 0, we are -done. Make sure you take the situation where the input array is empty -(and thus no promise will ever resolve) into account. - -Handling failure requires some thought but turns out to be extremely -simple. Just pass the failure function of the wrapping promise to each -of the promises in the array so that a failure in one of them -triggers the failure of the whole wrapper. - -!!hint!! diff --git a/18_forms.txt b/18_forms.txt deleted file mode 100644 index 283190ed6..000000000 --- a/18_forms.txt +++ /dev/null @@ -1,966 +0,0 @@ -:chap_num: 18 -:prev_link: 17_http -:next_link: 19_paint -:load_files: ["code/promise.js"] - -= Forms and Form Fields = - -[chapterquote="true"] -[quote,Mephistopheles,in Goethe's Faust] -____ -I shall this very day, at Doctor's feast, + -My bounden service duly pay thee. + -But one thing!—For insurance’ sake, I pray thee, + -Grant me a line or two, at least. -____ - -(((Goethe+++,+++ Johann Wolfgang von)))(((Mephistopheles)))(((page -reload)))(((form)))Forms were introduced briefly in the -link:17_http.html#http_forms[previous chapter] as a way to -_((submit))_ information provided by the user over ((HTTP)). They were -designed for a pre-JavaScript Web, assuming that interaction with the -server always happens by navigating to a new page. - -But their elements are part of the ((DOM)) like the rest of the page, -and the DOM elements that represent form ((field))s support a number -of properties and events that are not present on other elements. These -make it possible to inspect and control such input fields with JavaScript programs -and do things such as adding functionality to a traditional form or using forms -and fields as building blocks in a JavaScript application. - -== Fields == - -(((form (HTML tag))))A web form consists of any number of input -((field))s grouped in a `` tag. HTML allows a number of -different styles of fields, ranging from simple on/off checkboxes to -drop-down menus and fields for text input. This book won't try to -comprehensively discuss all field types, but we will start with a rough -overview. - -(((input (HTML tag))))(((type attribute)))A lot of field types use the -`` tag. This tag's `type` attribute is used to select the -field's style. These are some commonly used `` types: - -[cols="1,5"] -|==== -|`text` |A single-line ((text field)) -|`password`|(((password field)))Same as `text` but hides the text that is typed -|`checkbox`|(((checkbox)))An on/off switch -|`radio` |(((radio button)))(Part of) a ((multiple-choice)) field -|`file` |(((file field)))Allows the user to choose a file from their computer -|==== - -(((value attribute)))(((checked attribute)))(((form (HTML tag))))Form -fields do not necessarily have to appear in a `` tag. You can -put them anywhere in a page. Such fields cannot be ((submit))ted -(only a form as a whole can), but when responding to input with -JavaScript, we often do not want to submit our fields normally anyway. - -[source,text/html] ----- -

    (text)

    -

    (password)

    -

    (checkbox)

    -

    - - (radio)

    -

    (file)

    ----- - -ifdef::book_target[] - -The fields created with this HTML code look like this: - -image::img/form_fields.png[alt="Various types of input tags",width="4cm"] - -endif::book_target[] - -The JavaScript interface for such elements differs with the type of -the element. We'll go over each of them later in the chapter. - -(((textarea (HTML tag))))(((text field)))Multiline text fields have -their own tag, `` closing tag and uses the text -between those two, instead of using its `value` attribute, as starting -text. - -[source,text/html] ----- - ----- - -(((select (HTML tag))))(((option (HTML tag))))(((multiple -choice)))(((drop-down menu)))Finally, the ` - - - - ----- - -ifdef::book_target[] - -Such a field looks like this: - -image::img/form_select.png[alt="A select field",width="4cm"] - -endif::book_target[] - -(((change event)))Whenever the value of a form field changes, it fires -a `"change"` event. - -== Focus == - -indexsee:[keyboard focus,focus] -(((keyboard)))(((focus)))Unlike most elements in an HTML document, -form fields can get _keyboard ((focus))_. When clicked—or activated in -some other way—they become the currently active element, the main -recipient of keyboard ((input)). - -(((option (HTML tag))))(((select (HTML tag))))If a document has a -((text field)), text typed will end up in there only when the field is -focused. Other fields respond differently to keyboard events. For -example, a ` - ----- - -(((autofocus attribute)))For some pages, the user is expected to -want to interact with a form field immediately. -JavaScript can be used to ((focus)) this field when the document is -loaded, but HTML also provides the `autofocus` attribute, which -produces the same effect but lets the browser know what we are trying -to achieve. This makes it possible for the browser to disable the -behavior when it is not appropriate, such as when the user has focused -something else. - -[source,text/html] -[focus="yes"] ----- - ----- - -(((tab key)))(((keyboard)))(((tabindex attribute)))(((a (HTML -tag))))Browsers traditionally also allow the user to move the focus -through the document by pressing the Tab key. We can influence the -order in which elements receive focus with the `tabindex` attribute. -The following example document will let focus jump from the text input to -the OK button, rather than going through the help link first: - -[source,text/html] -[focus="yes"] ----- - (help) - ----- - -(((tabindex attribute)))By default, most types of HTML elements cannot -be focused. But you can add a `tabindex` attribute to any element, -which will make it focusable. - -== Disabled fields == - -(((disabled attribute)))All ((form)) ((field))s can be _disabled_ -through their `disabled` attribute, which also exists as a property on -the element's DOM object. - -[source,text/html] ----- - - ----- - -Disabled fields cannot be ((focus))ed or changed, and unlike active -fields, they usually look gray and faded. - -ifdef::book_target[] - -image::img/button_disabled.png[alt="A disabled button",width="3cm"] - -endif::book_target[] - -(((user experience)))(((asynchronous programming)))When a program is -in the process of handling an action caused by some ((button)) or other control, -which might require communication with the server and thus take a -while, it can be a good idea to -disable the control until the action finishes. That way, when the user -gets impatient and clicks it again, they don't accidentally repeat -their action. - -== The form as a whole == - -(((array-like object)))(((form (HTML tag))))(((form -property)))(((elements property)))When a ((field)) is contained in a -`` element, its DOM element will have a property `form` linking -back to the form's DOM element. The `` element, in turn, has a -property called `elements` that contains an array-like collection of the fields -inside it. - -(((elements property)))(((name attribute)))The `name` attribute of a -form field determines the way its value will be identified when the -form is ((submit))ted. It can also be used as a property name when -accessing the form's `elements` property, which acts both as an -array-like object (accessible by number) and a ((map)) (accessible by -name). - -[source,text/html] ----- - - Name:
    - Password:
    - - - ----- - -(((button (HTML tag))))(((type attribute)))(((submit)))(((Enter -key)))A button with a `type` attribute of `submit` will, when pressed, -cause the form to be submitted. Pressing Enter when a form field is -focused has the same effect. - -(((submit event)))(((event handling)))(((preventDefault -method)))(((page reload)))(((GET method)))(((POST method)))Submitting -a ((form)) normally means that the -((browser)) navigates to the page indicated by the form's `action` -attribute, using either a `GET` or a `POST` ((request)). But before -that happens, a `"submit"` event is fired. This event can be handled -by JavaScript, and the handler can prevent the default behavior by -calling `preventDefault` on the event object. - -[source,text/html] ----- -
    - Value: - -
    - ----- - -(((submit event)))(((validation)))(((XMLHttpRequest)))Intercepting -`"submit"` events in JavaScript has various uses. We can write code to -verify that the values the user entered make sense and immediately -show an error message instead of submitting the form when they don't. -Or we can disable the regular way of submitting the form entirely, as -in the previous example, and have our program handle the input, possibly -using `XMLHttpRequest` to send it over to a server without reloading -the page. - -== Text fields == - -(((value attribute)))(((input (HTML tag))))(((text field)))(((textarea -(HTML tag))))Fields created by `` tags with a type of `text` or -`password`, as well as `textarea` tags, share a common ((interface)). -Their ((DOM)) elements have a `value` property that holds their -current content as a string value. Setting this property to another string -changes the field's content. - -(((selectionStart property)))(((selectionEnd property)))The -`selectionStart` and `selectionEnd` properties of ((text field))s give -us information about the ((cursor)) and ((selection)) in the ((text)). -When nothing is selected, these two properties hold the same number, -indicating the position of the cursor. For example, 0 indicates the -start of the text, and 10 indicates the cursor is after the 10^th^ ((character)). -When part of the field is selected, the two properties will differ, giving us the -start and end of the selected text. Like `value`, these properties may -also be written to. - -(((Khasekhemwy)))(((textarea (HTML -tag))))(((keyboard)))(((event handling)))As an example, imagine you -are writing an article about Khasekhemwy but have some -trouble spelling his name. The following code wires up a ` - ----- - -(((replaceSelection function)))(((text field)))The `replaceSelection` -function replaces the currently selected part of a text field's -content with the given word and then moves the ((cursor)) after that -word so that the user can continue typing. - -(((change event)))(((input event)))The `"change"` event for a ((text -field)) does not fire every time something is typed. Rather, it -fires when the field loses ((focus)) after its content was changed. -To respond immediately to changes in a text field, you should register -a handler for the `"input"` event instead, which fires for every -time the user types a character, deletes text, or otherwise manipulates -the field's content. - -The following example shows a text field and a counter showing the current -length of the text entered: - -[source,text/html] ----- - length: 0 - ----- - -== Checkboxes and radio buttons == - -(((input (HTML tag))))(((checked attribute)))A ((checkbox)) field is a -simple binary toggle. Its value can be extracted or changed through -its `checked` property, which holds a Boolean value. - -[source,text/html] ----- - - - ----- - -(((for attribute)))(((id attribute)))(((focus)))(((label (HTML -tag))))(((labeling)))The `