pull latest readability-php via composer

master
Andrew Dolgov 4 years ago
parent 1baf8c5217
commit cf1ede0ba8

@ -1,4 +1,6 @@
<?php
require "vendor/autoload.php";
use andreskrey\Readability\Readability;
use andreskrey\Readability\Configuration;

@ -0,0 +1,3 @@
coverage_clover: test/clover.xml
json_path: test/coveralls-upload.json
service_name: travis-ci

@ -0,0 +1,4 @@
.idea/
vendor
composer.lock
/test.*

@ -0,0 +1,13 @@
preset: recommended
enabled:
- concat_with_spaces
- unalign_double_arrow
disabled:
- concat_without_spaces
- align_double_arrow
- simplified_null_return
- cast_spaces
- trailing_comma_in_multiline_array
- phpdoc_align

@ -0,0 +1,48 @@
language: bash
services: docker
env:
matrix:
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.9
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.8
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.7
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.6
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.5
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.4
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.9
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.8
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.7
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.6
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.5
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.4
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.9
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.8
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.7
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.6
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.5
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.4
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.9
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.8
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.7
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.6
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.5
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.4
matrix:
allow_failures:
- env: LIBXML_VERSION=2.9.9
- env: LIBXML_VERSION=2.9.8
- env: LIBXML_VERSION=2.9.7
- env: LIBXML_VERSION=2.9.6
- env: LIBXML_VERSION=2.9.5
install:
- docker run --rm --volume $PWD:/app --workdir="/app" composer install
script:
- docker build --build-arg PHP_VERSION=${PHP_VERSION} --build-arg LIBXML_VERSION=${LIBXML_VERSION} -t travis-build - < ./docker/php/Dockerfile
- docker run --volume $PWD:/app --workdir="/app" travis-build php ./vendor/bin/phpunit --coverage-clover /app/test/clover.xml
after_script:
- docker run --volume $PWD:/app --workdir="/app" composer require php-coveralls/php-coveralls:^2.0
- docker run --volume $PWD:/app --workdir="/app" --env TRAVIS=${TRAVIS} --env TRAVIS_JOB_ID=${TRAVIS_JOB_ID} travis-build php ./vendor/php-coveralls/php-coveralls/bin/php-coveralls -v

@ -0,0 +1,13 @@
# Authors
Readability.php developed by **Andres Rey**.
Based on Arc90's readability.js (1.7.1) script available at: http://code.google.com/p/arc90labs-readability.
Copyright (c) 2010 Arc90 Inc
The AUTHORS/Contributors are (and/or have been):
* Andres Rey
* Sergiy Lavryk
* Pedro Amorim
* Malu Decks

@ -0,0 +1,145 @@
# Change Log
All notable changes to this project will be documented in this file.
## Unreleased
## [v2.1.0](https://github.com/andreskrey/readability.php/releases/tag/v2.1.0)
- Avoid overwriting extracted metadata with similarly named keys (like `og:image` and `og:image:width`)
- Imported new `getSiteName()` feature from JS version as of [21 Dec 2018](https://github.com/mozilla/readability/pull/504)
- Added getFirstElementChild function to NodeTrait + test case (Issue #83)
- Reworked the test suit to use TestPage objects and give more hints about what failed
- Removed getWordThreshold and setWordThreshold configuration functions
- Added NodeUtility::filterTextNodes and deprecated NodeTrait getChildren()
- Added new DOMNodeList fake class that mimics the original DOMNodeList class but allows to add new nodes to the list
- Added new Dockerfiles that pulls different versions of PHP and libxml. Now we are supporting 4 versions of PHP and 6 versions of libxml!
## [v2.0.1](https://github.com/andreskrey/readability.php/releases/tag/v2.0.1)
- Fixed small issue that prevented the main image from showing up in the results
## [v2.0.0](https://github.com/andreskrey/readability.php/releases/tag/v2.0.0)
- [BREAKING CHANGE] Bumped the minimum supported version of PHP to 7.0
- Clean `<aside>` tags during `prepArticle()`.
- Merged PR #58: Fix notice non-object on $parentOfTopCandidate for tumblr.com
- Fixed issue #63: Division by zero
- Housekeeping:
- Removed $parseSuccessful flag that wasn't needed anymore
- Rename wordThreshold to charThreshold and throw deprecation notices. WordThreshold will be removed in version 3.0.
- Added "-ad-" as unlikely candidate
- Added Docker containers with PHP 7.0, 7.1, and 7.2 and makefile to trigger the tests.
- Imported new code from the JS version as of [19 Nov 2018](https://github.com/mozilla/readability/commit/876c81f710711ba2afb36dd83889d4c5b4fc2743), which includes the following changes:
- Move phrasing contents [into paragraphs](https://github.com/mozilla/readability/commit/9f2c5cb42ee9635f091178271d66888cbb47e5dc)
- Improved the title detection
- Remove [single cell tables](https://github.com/mozilla/readability/commit/ea4165721f9105d8f1e53cfecdcfdafceaf3e4bf)
- Improved the detection of video related elements
- New test cases
- Various minor fixes
## [v1.2.0](https://github.com/andreskrey/readability.php/releases/tag/v1.2.0)
- Merged PR#49 (Missing object when calling `->getContent()`)
- Imported all changes from Readability.js as of 2 March 2018 ([8525c6a](https://github.com/mozilla/readability/commit/8525c6af36d3badbe27c4672a6f2dd99ddb4097f)):
- Check for `<base>` elements before converting URLs to absolute.
- Clean `<link>` tags on `prepArticle()`
- Attempt to return at least some text if all the algorithm runs fail (Check PR [#423](https://github.com/mozilla/readability/pull/423) on JS version)
- Add new test cases for the previous changes
- And all other changes reflected [in this diff](https://github.com/mozilla/readability/compare/c3ff1a2d2c94c1db257b2c9aa88a4b8fbeb221c5...8525c6af36d3badbe27c4672a6f2dd99ddb4097f)
## [v1.1.1](https://github.com/andreskrey/readability.php/releases/tag/v1.1.1)
- Switched from assertEquals to assertSame on unit testing to avoid weak comparisons.
- Added a safe check to avoid sending the DOMDocument as a node when scanning for node ancestors.
- Fix issue #45: Small mistake in documentation
- Fix issue #46: Added `data-src` as a image source path
- Fixed bug when extracting all the image of the article (Was extracting images from the original DOM instead of the parsed one)
- Added the `->getDOMDocument()` getter to retrieve the fully parsed DOMDocument
- Merged PR #48 that allows passing an array as configuration (@topotru)
## [v1.1.0](https://github.com/andreskrey/readability.php/releases/tag/v1.1.0)
- Added 'data-orig' as an URL source for images
- Removed 'modal' as a negative property from classes
- Added option to inject a logger
- Removed all references to the `data-readability` tags that don't apply anymore to the new structure
- Merged PR #38 (Missing DOMEntityReference)
## [v1.0.0](https://github.com/andreskrey/readability.php/releases/tag/v1.0.0)
- Node encapsulation is gone. Pre v1 all nodes where encapsulated in a Readability class, which created lots of trouble with dependencies, responsibilities, and properties. Now all the encapsulation is gone: all the DOMNodes inside the Readability class are extensions of the original DOM classes, which allows the system to take advantage of the functions and properties of DOMDocument.
- HTMLParser is gone, Readability is the new main class. Switched things a bit for this release. Pre v1 you had to create an HTMLParser class to parse the HTML. Now you have to create a Readability class, feed it the text, and check the result.
- No more dumb arrays as a result. If you want to get the title, content, images, or anything else you'll have to use the getters of the Readability class.
- Environment class is gone. Now you have to create a configuration class and use setters to set your configuration options.
- Exceptions. Make sure you wrap your Readability class in a try catch block, because if it fails to parse your HTML, it will throw a `ParseException`.
- Minimum PHP version bumped to 5.6.
## [v0.3.1](https://github.com/andreskrey/readability.php/releases/tag/v0.3.1)
- Trim titles when detecting hierarchical separators to avoid false negatives on strings with spaces.
- Fix issue when converting divs to p nodes and never rating them (issue #29)
- Fix "Unsupported operand types" (PR #31)
- Fix division by zero when no title was found (issue #32)
- New function to retrieve all images at once (PR #30)
- Get the title from the `<title>` tag before searching on the `<meta>` tags
## [v0.3.0](https://github.com/andreskrey/readability.php/releases/tag/v0.3.0)
- Merged PR #24. Fixes notice when trying to extract `og:image`
- Up to date to commit [eb221c5](https://github.com/mozilla/readability/commit/c3ff1a2d2c94c1db257b2c9aa88a4b8fbeb221c5) (2017-10-16), which includes the following changes:
- New tags added to the unlikelyCandidates regex
- Detection and removal of hierarchical separators in titles
- Added more tags to clean after parsing the article (`button`, `textarea`, `select`, etc.)
- New way to detect empty nodes (including a edge case where a node with a `&nsbp;` was detected as a node with content)
- Better approach to find a top candidate (specially when a top candidate is the only child of a parent node, which allows a more accurate joining of sibling elements)
- Detect text direction (`ltr` or `rtl`)
- Detect and mark data tables to avoid removing them during final clean up
- Major fixes when scanning and deleting nodes (no need to traverse backwards anymore)
- Node cleaning via regex matches
- Clean table attributes during final clean up.
- Added license
Next release after this one will be v1 and will be a major refactor around Readability and HTMLParser methods and responsibilities.
## [v0.2.2](https://github.com/andreskrey/readability.php/releases/tag/v0.2.2)
- Added a safecheck for really nasty HTML
- Added summonCthulhu option, to remove all script tags via regex
## [v0.2.1](https://github.com/andreskrey/readability.php/releases/tag/v0.2.1)
- Added `normalizeEntities` flag to convert UTF-8 characters to its HTML Entity equivalent. Fixes bugs on htmls with mixed encoding.
- Added more information to the readme.md file
- New way to create a backup DOM: not creating a backup. In the previous version, the system cloned the $this->dom object to keep it as a backup in order to restart the algorithm with other flags, if needed. This seemed to work until I realized that *sometimes* the backup changes even if we are not touching it. Seems that the `dom` and `backupdom` objects are linked and *some* changes on the dom object reach the bakcupdom object. The new approach consists in deleting the backupdom object and recreating from scratch the dom object. Of course this has a performance impact, but seems to be quite low.
## [v0.2.0](https://github.com/andreskrey/readability.php/releases/tag/v0.2.0)
100% complete port of Readability.js!
- Every test unit passes
- Readability.php produces the same exact output as Readability.js
- I'm happy :)
### Fixed
- Lots of bugs
- Merged PR by DavidFricker to avoid exceptions while grabbing the document content
### Added
- substituteEntities flag, to avoid replacing especial characters with HTML entities. There's nothing we can do about `&nbsp;`, that entity is replaced by libxml and there's no way to disable it.
- Named data sets so it's easier to detect which test case is failing.
### Removed
- Couple of test cases that involved broken JS. There's nothing we can do about JS spilling onto the text.
## [0.0.3-alpha](https://github.com/andreskrey/readability.php/releases/tag/v0.0.3v-alpha)
We are getting closer to be a 100% complete port of Readability.js!
- Added prepArticle to remove junk after selecting the top candidates.
- Added a function to restore score after selecting top candidates. This basically works by scanning the data-readability tag and restoring the score to the contentScore variable. This is an horrible hack and should be removed once we ditch the Element interface of html-to-markdown and start extending the DOMDocument object.
- Switched all strlen functions to mb_strlen
- Fixed lots of bugs and pretty sure that introduced a bunch of new ones.
## [0.0.2-alpha](https://github.com/andreskrey/readability.php/releases/tag/v0.0.2-alpha)
- Last version I'm using master as the main development branch. All unreleased changes and main development will happen in the develop branch.
## [0.0.1-alpha](https://github.com/andreskrey/readability.php/releases/tag/v0.0.1-alpha)
- Initial release

@ -0,0 +1,30 @@
# Contributing
Contributions are **welcome** and will be fully **credited**.
We accept contributions via Pull Requests on [Github](https://github.com/andreskrey/readability.php/).
## Pull Requests
- **Document any change in behaviour** - Make sure the `README.md` and any other relevant documentation are kept up-to-date.
- **Add tests!** - Your patch won't be accepted if it doesn't have tests.
- **Create feature branches** - Don't ask us to pull from your master branch.
- **One pull request per feature** - If you want to do more than one thing, send multiple pull requests.
- **Send coherent history** - Make sure each individual commit in your pull request is meaningful. If you had to make multiple intermediate commits while developing, please [squash them](http://www.git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Changing-Multiple-Commit-Messages) before submitting.
- **Don't forget to add yourself to AUTHORS.md** - If you want to be credited, make sure you add your information (whatever you want to include) in `AUTHORS.md`.
## Running Tests
``` bash
$ make test-all #requires docker and docker-compose
```
**Happy coding**!

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

@ -0,0 +1,30 @@
.PHONY: test-all
test-all: start test-7.3 test-7.2 test-7.1 test-7.0 stop
test-7.3:
docker-compose exec php-7.3-libxml-2.9.9 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
test-7.2:
docker-compose exec php-7.2-libxml-2.9.9 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
test-7.1:
docker-compose exec php-7.1-libxml-2.9.9 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
test-7.0:
docker-compose exec php-7.0-libxml-2.9.9 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
start:
docker-compose up -d php-7.3-libxml-2.9.9 php-7.2-libxml-2.9.9 php-7.1-libxml-2.9.9 php-7.0-libxml-2.9.9
stop:
docker-compose stop
test-all-versions:
for php_version in 7.0 7.1 7.2 7.3; do \
for libxml_version in 2.9.4 2.9.5 2.9.6 2.9.7 2.9.8 2.9.9; do \
docker-compose up -d php-$$php_version-libxml-$$libxml_version; \
docker-compose exec php-$$php_version-libxml-$$libxml_version php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml; \
done \
done
docker-compose stop

@ -0,0 +1,200 @@
# Readability.php
[![Latest Stable Version](https://poser.pugx.org/andreskrey/readability.php/v/stable)](https://packagist.org/packages/andreskrey/readability.php) [![Build Status](https://travis-ci.org/andreskrey/readability.php.svg?branch=master)](https://travis-ci.org/andreskrey/readability.php) [![Coverage Status](https://coveralls.io/repos/github/andreskrey/readability.php/badge.svg?branch=master)](https://coveralls.io/github/andreskrey/readability.php/?branch=master) [![StyleCI](https://styleci.io/repos/71042668/shield?branch=master)](https://styleci.io/repos/71042668) [![Total Downloads](https://poser.pugx.org/andreskrey/readability.php/downloads)](https://packagist.org/packages/andreskrey/readability.php) [![Monthly Downloads](https://poser.pugx.org/andreskrey/readability.php/d/monthly)](https://packagist.org/packages/andreskrey/readability.php)
PHP port of *Mozilla's* **[Readability.js](https://github.com/mozilla/readability)**. Parses html text (usually news and other articles) and returns **title**, **author**, **main image** and **text content** without nav bars, ads, footers, or anything that isn't the main body of the text. Analyzes each node, gives them a score, and determines what's relevant and what can be discarded.
![Screenshot](https://raw.githubusercontent.com/andreskrey/readability.php/assets/screenshot.png)
The project aim is to be a 1 to 1 port of Mozilla's version and to follow closely all changes introduced there, but there are some major differences on the structure. Most of the code is a 1:1 copy even the comments were imported but some functions and structures were adapted to suit better the PHP language.
**Lead Developer**: Andres Rey
## Requirements
PHP 7.0+, ext-dom, ext-xml, and ext-mbstring. To install all this dependencies (in the rare case your system does not have them already), you could try something like this in *nix like environments:
`$ sudo apt-get install php7.1-xml php7.1-mbstring`
## How to use it
First you have to require the library using composer:
`composer require andreskrey/readability.php`
Then, create a Readability class and pass a Configuration class, feed the `parse()` function with your HTML and echo the variable:
```php
use andreskrey\Readability\Readability;
use andreskrey\Readability\Configuration;
use andreskrey\Readability\ParseException;
$readability = new Readability(new Configuration());
$html = file_get_contents('http://your.favorite.newspaper/article.html');
try {
$readability->parse($html);
echo $readability;
} catch (ParseException $e) {
echo sprintf('Error processing text: %s', $e->getMessage());
}
```
Your script will output the parsed text or inform about any errors. You should always wrap the `->parse` call in a try/catch block because if the HTML cannot be parsed correctly, a `ParseException` will be thrown.
If you want to have a finer control on the output, just call the properties one by one, wrapping it with your own HTML.
```php
<h1><?= $readability->getTitle(); ?></h1>
<h2>By <?= $readability->getAuthor(); ?></h2>
<div class="content"><?= $readability->getContent(); ?></div>
```
Here's a list of the available properties:
- Article title: `->getTitle();`
- Article content: `->getContent();`
- Excerpt: `->getExcerpt();`
- Main image: `->getImage();`
- All images: `->getImages();`
- Author: `->getAuthor();`
- Text direction (ltr or rtl): `->getDirection();`
If you need to tweak the final HTML you can get the DOMDocument of the result by calling `->getDOMDocument()`.
## Options
You can change the behaviour of Readability via the Configuration object. For example, if you want to fix relative URLs and declare the original URL, you could set up the configuration like this:
```php
$configuration = new Configuration();
$configuration
->setFixRelativeURLs(true)
->setOriginalURL('http://my.newspaper.url/article/something-interesting-to-read.html');
```
Also you can pass an array of configuration parameters to the constructor:
```php
$configuration = new Configuration([
'fixRelativeURLs' => true,
'originalURL' => 'http://my.newspaper.url/article/something-interesting-to-read.html',
// other parameters ... listing below
]);
```
Then you pass this Configuration object to Readability. The following options are available. Remember to prepend `set` when calling them using native setters.
- **MaxTopCandidates**: default value `5`, max amount of top level candidates.
- **WordThreshold**: default value `500`, minimum amount of characters to consider that the article was parsed successful.
- **ArticleByLine**: default value `false`, search for the article byline and remove it from the text. It will be moved to the article metadata.
- **StripUnlikelyCandidates**: default value `true`, remove nodes that are unlikely to have relevant information. Useful for debugging or parsing complex or non-standard articles.
- **CleanConditionally**: default value `true`, remove certain nodes after parsing to return a cleaner result.
- **WeightClasses**: default value `true`, weight classes during the rating phase.
- **FixRelativeURLs**: default value `false`, convert relative URLs to absolute. Like `/test` to `http://host/test`.
- **SubstituteEntities**: default value `false`, disables the `substituteEntities` flag of libxml. Will avoid substituting HTML entities. Like `&aacute;` to á.
- **NormalizeEntities**: default value `false`, converts UTF-8 characters to its HTML Entity equivalent. Useful to parse HTML with mixed encoding.
- **OriginalURL**: default value `http://fakehost`, original URL from the article used to fix relative URLs.
- **SummonCthulhu**: default value `false`, remove all `<script>` nodes via regex. This is not ideal as it might break things, but might be the only solution to [libxml problems with unescaped javascript](https://github.com/andreskrey/readability.php#known-issues). If you're not parsing Javascript tutorials, it's recommended to always set this option as `true`.
### Debug log
Logging is optional and you will have to inject your own logger to save all the debugging messages. To do so, use a logger that implements the [PSR-3 logging interface](https://github.com/php-fig/log) and pass it to the configuration object. For example:
```
// Using monolog
$log = new Logger('Readability');
$log->pushHandler(new StreamHandler('path/to/my/log.txt'));
$configuration->setLogger($log);
```
In the log you will find information about the parsed nodes, why they were removed, and why they were considered relevant to the final article.
## Limitations
Of course the main limitation is PHP. Websites that load the content through lazy loading, AJAX, or any type of javascript fueled call will be ignored (actually, *not ran*) and the resulting text will be incorrect, compared to the readability.js results. All the articles you want to parse with readability.php need to be complete and all the content should be in the HTML already.
## Known Issues
### Javascript spilling into the text body
DOMDocument has some issues while parsing javascript with unescaped HTML on strings. Consider the following code:
```html
<div> <!-- Offending div without closing tag -->
<script type="text/javascript">
var test = '</div>';
// I should not appear on the result
</script>
```
If you would like to remove the scripts of the HTML (like readability does), you would expect ending up with just one div and one comment on the final HTML. The problem is that libxml takes that closing div tag inside the javascript string as a HTML tag, effectively closing the unclosed tag and leaving the rest of the javascript as a string within a P tag. If you save that node, the final HTML will end up like this:
```html
<div> <!-- Offending div without closing tag -->
<p>';
// I should not appear on the result
</p></div>
```
This is a libxml issue and not a Readability.php bug.
There's a workaround for this: using the `summonCthulhu` option. This will remove all script tags **via regex**, which is not ideal because you may end up summoning [the lord of darkness](https://stackoverflow.com/a/1732454).
### &nbsp entities disappearing
`&nbsp` entities are converted to spaces automatically by libxml and there's no way to disable it.
### Self closing tags rendering as fully expanded tags
Self closing tags like `<br />` get automatically expanded to `<br></br`. No way to disable it in libxml.
## Dependencies
Readability.php uses the [PSR Log](https://github.com/php-fig/log) interface to define the allowed type of loggers. [Monolog](https://github.com/Seldaek/monolog) is only required on development installations. (`--dev` option during `composer install`).
## To-do
- Keep up with Readability.js changes
- Add a small template engine for the __toString() method, instead of using a hardcoded one.
- Replace all the `iterator_to_array` calls with a custom PHP generator that keeps track of the removed or altered nodes.
## How it works
Readability parses all the text with DOMDocument, scans the text nodes and gives the a score, based on the amount of words, links and type of element. Then it selects the highest scoring element and creates a new DOMDocument with all its siblings. Each sibling is scored to discard useless elements, like nav bars, empty nodes, etc.
## Testing
Any version of PHP installed locally should be enough to develop new features and add new test cases. If you want to be 100% sure that your change doesn't create any issues with other versions of PHP, you can use the provided Docker containers to test currently in 7.0, 7.1, and 7.2.
You'll need Docker and Docker Compose for this. To run all the tests in all the available versions just type the following command:
```bash
make test-all
```
This will start all the containers and run all the tests on every supported version of PHP. If you want to test against a specific version, you can use `make test-7.0`, `make test-7.1`, or `make test-7.2`.
## Code porting
Up to date with readability.js as of [19 Nov 2018](https://github.com/mozilla/readability/commit/876c81f710711ba2afb36dd83889d4c5b4fc2743).
## License
Based on Arc90's readability.js (1.7.1) script available at: http://code.google.com/p/arc90labs-readability
Copyright (c) 2010 Arc90 Inc
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

@ -0,0 +1,37 @@
{
"name": "andreskrey/readability.php",
"type": "library",
"description": "A PHP port of Readability.js",
"keywords": ["readability", "html"],
"homepage": "https://github.com/andreskrey/readability",
"license": "Apache-2.0",
"authors": [
{
"name": "Andres Rey",
"email": "andreskrey@gmail.com",
"role": "Lead Developer"
}
],
"autoload": {
"psr-4": {
"andreskrey\\Readability\\": "src/"
}
},
"autoload-dev": {
"psr-4": {"andreskrey\\Readability\\Test\\": "test"}
},
"require": {
"php": ">=7.0.0",
"ext-dom": "*",
"ext-xml": "*",
"ext-mbstring": "*",
"psr/log": "^1.0"
},
"require-dev": {
"phpunit/phpunit": "^6.5",
"monolog/monolog": "^1.24"
},
"suggest": {
"monolog/monolog": "Allow logging debug information"
}
}

@ -0,0 +1,196 @@
version: '3'
services:
php-7.0-libxml-2.9.4: &template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.4
PHP_VERSION: 7.0
volumes:
- ./:/app
tty: true
php-7.0-libxml-2.9.5:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.5
PHP_VERSION: 7.0
php-7.0-libxml-2.9.6:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.6
PHP_VERSION: 7.0
php-7.0-libxml-2.9.7:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.7
PHP_VERSION: 7.0
php-7.0-libxml-2.9.8:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.8
PHP_VERSION: 7.0
php-7.0-libxml-2.9.9:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.9
PHP_VERSION: 7.0
php-7.1-libxml-2.9.4:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.4
PHP_VERSION: 7.1
php-7.1-libxml-2.9.5:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.5
PHP_VERSION: 7.1
php-7.1-libxml-2.9.6:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.6
PHP_VERSION: 7.1
php-7.1-libxml-2.9.7:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.7
PHP_VERSION: 7.1
php-7.1-libxml-2.9.8:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.8
PHP_VERSION: 7.1
php-7.1-libxml-2.9.9:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.9
PHP_VERSION: 7.1
php-7.2-libxml-2.9.4:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.4
PHP_VERSION: 7.2
php-7.2-libxml-2.9.5:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.5
PHP_VERSION: 7.2
php-7.2-libxml-2.9.6:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.6
PHP_VERSION: 7.2
php-7.2-libxml-2.9.7:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.7
PHP_VERSION: 7.2
php-7.2-libxml-2.9.8:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.8
PHP_VERSION: 7.2
php-7.2-libxml-2.9.9:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.9
PHP_VERSION: 7.2
php-7.3-libxml-2.9.4:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.4
PHP_VERSION: 7.3
php-7.3-libxml-2.9.5:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.5
PHP_VERSION: 7.3
php-7.3-libxml-2.9.6:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.6
PHP_VERSION: 7.3
php-7.3-libxml-2.9.7:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.7
PHP_VERSION: 7.3
php-7.3-libxml-2.9.8:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.8
PHP_VERSION: 7.3
php-7.3-libxml-2.9.9:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.9
PHP_VERSION: 7.3

@ -0,0 +1,8 @@
ARG PHP_VERSION
ARG LIBXML_VERSION
FROM andreskrey/php-${PHP_VERSION}:libxml-${LIBXML_VERSION}
RUN pecl install xdebug && docker-php-ext-enable xdebug
# Required by coveralls
RUN apt-get install git -y

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit bootstrap="vendor/autoload.php"
colors="true"
stopOnFailure="false"
stopOnError="false">
<testsuites>
<testsuite name="Readability.php Test Suite">
<directory>./test/</directory>
</testsuite>
</testsuites>
<filter>
<whitelist>
<directory suffix=".php">src/</directory>
</whitelist>
</filter>
</phpunit>

@ -0,0 +1,92 @@
<?php
namespace andreskrey\Readability\Test;
use andreskrey\Readability\Configuration;
use Monolog\Handler\NullHandler;
use Monolog\Logger;
/**
* Class ConfigurationTest.
*/
class ConfigurationTest extends \PHPUnit\Framework\TestCase
{
/**
* @dataProvider getParams
*
* @param array $params
*/
public function testConfigurationConstructorSetsParameters(array $params)
{
$config = new Configuration($params);
$this->doEqualsAsserts($config, $params);
}
/**
* @dataProvider getParams
*
* @param array $params
*/
public function testInvalidParameterIsNotInConfig(array $params)
{
$config = new Configuration($params);
$this->assertArrayNotHasKey('invalidParameter', $config->toArray(), 'Invalid param key is not present in config');
}
/**
* @param Configuration $config
* @param array $options
*/
private function doEqualsAsserts(Configuration $config, array $options)
{
$this->assertEquals($options['maxTopCandidates'], $config->getMaxTopCandidates());
$this->assertEquals($options['charThreshold'], $config->getCharThreshold());
$this->assertEquals($options['articleByLine'], $config->getArticleByLine());
$this->assertEquals($options['stripUnlikelyCandidates'], $config->getStripUnlikelyCandidates());
$this->assertEquals($options['cleanConditionally'], $config->getCleanConditionally());
$this->assertEquals($options['weightClasses'], $config->getWeightClasses());
$this->assertEquals($options['fixRelativeURLs'], $config->getFixRelativeURLs());
$this->assertEquals($options['substituteEntities'], $config->getSubstituteEntities());
$this->assertEquals($options['normalizeEntities'], $config->getNormalizeEntities());
$this->assertEquals($options['originalURL'], $config->getOriginalURL());
$this->assertEquals($options['summonCthulhu'], $config->getOriginalURL());
}
/**
* @return array
*/
public function getParams()
{
return [[
'All current parameters' => [
'maxTopCandidates' => 3,
'wordThreshold' => 500,
'charThreshold' => 500,
'articleByLine' => true,
'stripUnlikelyCandidates' => false,
'cleanConditionally' => false,
'weightClasses' => false,
'fixRelativeURLs' => true,
'substituteEntities' => true,
'normalizeEntities' => true,
'originalURL' => 'my.original.url',
'summonCthulhu' => 'my.original.url',
'invalidParameter' => 'invalidParameterValue'
]
]];
}
/**
* Test if a logger interface can be injected and retrieved from the Configuration object.
*/
public function testLoggerCanBeInjected()
{
$configuration = new Configuration();
$log = new Logger('Readability');
$log->pushHandler(new NullHandler());
$configuration->setLogger($log);
$this->assertSame($log, $configuration->getLogger());
}
}

@ -0,0 +1,148 @@
<?php
namespace andreskrey\Readability\Test;
use andreskrey\Readability\Configuration;
use andreskrey\Readability\ParseException;
use andreskrey\Readability\Readability;
/**
* Class ReadabilityTest.
*/
class ReadabilityTest extends \PHPUnit\Framework\TestCase
{
/**
* Test that Readability parses the HTML correctly and matches the expected result.
*
* @dataProvider getSamplePages
*
* @param TestPage $testPage
*
* @throws ParseException
*/
public function testReadabilityParsesHTML(TestPage $testPage)
{
$options = ['OriginalURL' => 'http://fakehost/test/test.html',
'FixRelativeURLs' => true,
'SubstituteEntities' => true,
'ArticleByLine' => true
];
$configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
$readability = new Readability($configuration);
$readability->parse($testPage->getSourceHTML());
$this->assertSame($testPage->getExpectedHTML(), $readability->getContent(), 'Parsed text does not match the expected one.');
}
/**
* Test that Readability parses the HTML correctly and matches the expected result.
*
* @dataProvider getSamplePages
*
* @param TestPage $testPage
*
* @throws ParseException
*/
public function testReadabilityParsesMetadata(TestPage $testPage)
{
$options = ['OriginalURL' => 'http://fakehost/test/test.html',
'FixRelativeURLs' => true,
'SubstituteEntities' => true,
'ArticleByLine' => true
];
$configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
$readability = new Readability($configuration);
$readability->parse($testPage->getSourceHTML());
$this->assertSame($testPage->getExpectedMetadata()->Author, $readability->getAuthor(), 'Parsed Author does not match expected value.');
$this->assertSame($testPage->getExpectedMetadata()->Direction, $readability->getDirection(), 'Parsed Direction does not match expected value.');
$this->assertSame($testPage->getExpectedMetadata()->Excerpt, $readability->getExcerpt(), 'Parsed Excerpt does not match expected value.');
$this->assertSame($testPage->getExpectedMetadata()->Image, $readability->getImage(), 'Parsed Image does not match expected value.');
$this->assertSame($testPage->getExpectedMetadata()->Title, $readability->getTitle(), 'Parsed Title does not match expected value.');
}
/**
* Test that Readability returns all the expected images from the test page.
*
* @param TestPage $testPage
* @dataProvider getSamplePages
*
* @throws ParseException
*/
public function testHTMLParserParsesImages(TestPage $testPage)
{
$options = ['OriginalURL' => 'http://fakehost/test/test.html',
'fixRelativeURLs' => true,
'substituteEntities' => true,
];
$configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
$readability = new Readability($configuration);
$readability->parse($testPage->getSourceHTML());
$this->assertSame($testPage->getExpectedImages(), $readability->getImages());
}
/**
* Main data provider.
*
* @return \Generator
*/
public function getSamplePages()
{
$path = pathinfo(__FILE__, PATHINFO_DIRNAME) . DIRECTORY_SEPARATOR . 'test-pages';
$testPages = scandir($path);
foreach (array_slice($testPages, 2) as $testPage) {
$testCasePath = $path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR;
$source = file_get_contents($testCasePath . 'source.html');
$expectedHTML = file_get_contents($testCasePath . 'expected.html');
$expectedImages = json_decode(file_get_contents($testCasePath . 'expected-images.json'), true);
$expectedMetadata = json_decode(file_get_contents($testCasePath . 'expected-metadata.json'));
$configuration = file_exists($testCasePath . 'config.json') ? json_decode(file_get_contents($testCasePath . 'config.json'), true) : [];
yield $testPage => [new TestPage($configuration, $source, $expectedHTML, $expectedImages, $expectedMetadata)];
}
}
/**
* Test that Readability throws an exception with malformed HTML.
*
* @throws ParseException
*/
public function testReadabilityThrowsExceptionWithMalformedHTML()
{
$parser = new Readability(new Configuration());
$this->expectException(ParseException::class);
$this->expectExceptionMessage('Invalid or incomplete HTML.');
$parser->parse('<html>');
}
/**
* Test that Readability throws an exception with incomplete or short HTML.
*
* @throws ParseException
*/
public function testReadabilityThrowsExceptionWithUnparseableHTML()
{
$parser = new Readability(new Configuration());
$this->expectException(ParseException::class);
$this->expectExceptionMessage('Could not parse text.');
$parser->parse('<html><body><p></p></body></html>');
}
/**
* Test that the Readability object has no content as soon as it is instantiated.
*/
public function testReadabilityCallGetContentWithNoContent()
{
$parser = new Readability(new Configuration());
$this->assertNull($parser->getContent());
}
}

@ -0,0 +1,61 @@
<?php
namespace andreskrey\Readability\Test;
class TestPage
{
private $configuration;
private $sourceHTML;
private $expectedHTML;
private $expectedImages;
private $expectedMetadata;
public function __construct($configuration, $sourceHTML, $expectedHTML, $expectedImages, $expectedMetadata)
{
$this->configuration = $configuration;
$this->sourceHTML = $sourceHTML;
$this->expectedHTML = $expectedHTML;
$this->expectedImages = $expectedImages;
$this->expectedMetadata = $expectedMetadata;
}
/**
* @return array
*/
public function getConfiguration()
{
return $this->configuration;
}
/**
* @return null
*/
public function getSourceHTML()
{
return $this->sourceHTML;
}
/**
* @return null
*/
public function getExpectedHTML()
{
return $this->expectedHTML;
}
/**
* @return mixed
*/
public function getExpectedImages()
{
return $this->expectedImages;
}
/**
* @return \stdClass
*/
public function getExpectedMetadata()
{
return $this->expectedMetadata;
}
}

@ -0,0 +1,3 @@
[
"http:\/\/fakehost\/static\/code\/2013\/blanket-coverage.png"
]

@ -0,0 +1,8 @@
{
"Author": "Nicolas Perriault —",
"Direction": null,
"Excerpt": "Nicolas Perriault's homepage.",
"Image": null,
"Title": "Get your Frontend JavaScript Code Covered | Code",
"SiteName": null
}

@ -0,0 +1,128 @@
<section><p><strong>So finally you're <a href="http://fakehost/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">testing your frontend JavaScript code</a>? Great! The more you
write tests, the more confident you are with your code… but how much precisely?
That's where <a href="http://en.wikipedia.org/wiki/Code_coverage">code coverage</a> might
help.</strong>
</p>
<p>The idea behind code coverage is to record which parts of your code (functions,
statements, conditionals and so on) have been executed by your test suite,
to compute metrics out of these data and usually to provide tools for navigating
and inspecting them.</p>
<p>Not a lot of frontend developers I know actually test their frontend code,
and I can barely imagine how many of them have ever setup code coverage…
Mostly because there are not many frontend-oriented tools in this area
I guess.</p>
<p>Actually I've only found one which provides an adapter for <a href="http://visionmedia.github.io/mocha/">Mocha</a> and
actually works…</p>
<blockquote>
<p>Drinking game for web devs:
<br></br>(1) Think of a noun
<br></br>(2) Google "&lt;noun&gt;.js"
<br></br>(3) If a library with that name exists - drink</p>— Shay Friedman (@ironshay)
<a href="https://twitter.com/ironshay/statuses/370525864523743232">August 22, 2013</a>
</blockquote>
<p><strong><a href="http://blanketjs.org/">Blanket.js</a></strong> is an <em>easy to install, easy to configure,
and easy to use JavaScript code coverage library that works both in-browser and
with nodejs.</em>
</p>
<p>Its use is dead easy, adding Blanket support to your Mocha test suite
is just matter of adding this simple line to your HTML test file:</p>
<pre><code>&lt;script src="vendor/blanket.js"
data-cover-adapter="vendor/mocha-blanket.js"&gt;&lt;/script&gt;
</code></pre>
<p>Source files: <a href="https://raw.github.com/alex-seville/blanket/master/dist/qunit/blanket.min.js">blanket.js</a>,
<a href="https://raw.github.com/alex-seville/blanket/master/src/adapters/mocha-blanket.js">mocha-blanket.js</a>
</p>
<p>As an example, let's reuse the silly <code>Cow</code> example we used
<a href="http://fakehost/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">in a previous episode</a>:</p>
<pre><code>// cow.js
(function(exports) {
"use strict";
function Cow(name) {
this.name = name || "Anon cow";
}
exports.Cow = Cow;
Cow.prototype = {
greets: function(target) {
if (!target)
throw new Error("missing target");
return this.name + " greets " + target;
}
};
})(this);
</code></pre>
<p>And its test suite, powered by Mocha and <a href="http://chaijs.com/">Chai</a>:</p>
<pre><code>var expect = chai.expect;
describe("Cow", function() {
describe("constructor", function() {
it("should have a default name", function() {
var cow = new Cow();
expect(cow.name).to.equal("Anon cow");
});
it("should set cow's name if provided", function() {
var cow = new Cow("Kate");
expect(cow.name).to.equal("Kate");
});
});
describe("#greets", function() {
it("should greet passed target", function() {
var greetings = (new Cow("Kate")).greets("Baby");
expect(greetings).to.equal("Kate greets Baby");
});
});
});
</code></pre>
<p>Let's create the HTML test file for it, featuring Blanket and its adapter
for Mocha:</p>
<pre><code>&lt;!DOCTYPE html&gt;
&lt;html&gt;
&lt;head&gt;
&lt;meta charset="utf-8"&gt;
&lt;title&gt;Test&lt;/title&gt;
&lt;link rel="stylesheet" media="all" href="vendor/mocha.css"&gt;
&lt;/head&gt;
&lt;body&gt;
&lt;div id="mocha"&gt;&lt;/div&gt;
&lt;div id="messages"&gt;&lt;/div&gt;
&lt;div id="fixtures"&gt;&lt;/div&gt;
&lt;script src="vendor/mocha.js"&gt;&lt;/script&gt;
&lt;script src="vendor/chai.js"&gt;&lt;/script&gt;
&lt;script src="vendor/blanket.js"
data-cover-adapter="vendor/mocha-blanket.js"&gt;&lt;/script&gt;
&lt;script&gt;mocha.setup('bdd');&lt;/script&gt;
&lt;script src="cow.js" data-cover&gt;&lt;/script&gt;
&lt;script src="cow_test.js"&gt;&lt;/script&gt;
&lt;script&gt;mocha.run();&lt;/script&gt;
&lt;/body&gt;
&lt;/html&gt;
</code></pre>
<p><strong>Notes</strong>:</p>
<ul><li>Notice the <code>data-cover</code> attribute we added to the script tag
loading the source of our library;</li>
<li>The HTML test file <em>must</em> be served over HTTP for the adapter to
be loaded.</li>
</ul><p>Running the tests now gives us something like this:</p>
<p>
<img alt="screenshot" src="http://fakehost/static/code/2013/blanket-coverage.png"></img></p>
<p>As you can see, the report at the bottom highlights that we haven't actually
tested the case where an error is raised in case a target name is missing.
We've been informed of that, nothing more, nothing less. We simply know
we're missing a test here. Isn't this cool? I think so!</p>
<p>Just remember that code coverage will only <a href="http://codebetter.com/karlseguin/2008/12/09/code-coverage-use-it-wisely/">bring you numbers</a> and
raw information, not actual proofs that the whole of your <em>code logic</em> has
been actually covered. If you ask me, the best inputs you can get about
your code logic and implementation ever are the ones issued out of <a href="http://www.extremeprogramming.org/rules/pair.html">pair programming</a>
sessions
and <a href="http://alexgaynor.net/2013/sep/26/effective-code-review/">code reviews</a>
but that's another story.</p>
<p><strong>So is code coverage silver bullet? No. Is it useful? Definitely. Happy testing!</strong>
</p>
</section>

@ -0,0 +1,233 @@
<!DOCTYPE html>
<html class="no-js" lang="en">
<head>
<meta charset="utf-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"/>
<title>Get your Frontend JavaScript Code Covered | Code | Nicolas Perriault</title>
<meta
name="description" content="Nicolas Perriault's homepage."/>
<meta name="viewport" content="width=device-width"/>
<link href="//fonts.googleapis.com/css?family=Asap:400,400italic,700,700italic&amp;subset=latin,latin-ext"
rel="stylesheet" type="text/css"/>
<link rel="stylesheet" type="text/css" href="/static/packed.css?1412806084"/>
<link rel="alternate" type="application/rss+xml" href="/code/feed/" title="Code (RSS)"/>
<link rel="alternate" type="application/rss+xml" href="/photography/feed/"
title="Photography (RSS)"/>
<link rel="alternate" type="application/rss+xml" href="/talks/feed/" title="Talks (RSS)"/>
<link rel="alternate" type="application/rss+xml" href="/carnet/feed/"
title="Carnet (RSS)"/>
<link rel="alternate" type="application/rss+xml" href="/feed/" title="Everything (RSS)"/>
<!--[if lt IE 9]>
<script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->
</head>
<body class="code " onload="prettyPrint()">
<!--[if lt IE 7]>
<p class="chromeframe">Your browser is <em>ancient!</em> Please <a href="http://www.quirksmode.org/upgrade.html">upgrade</a>.</p>
<![endif]-->
<div class="container">
<header class="main-title">
<h1><a href="/">Hi, I'm <strong>Nicolas.</strong></a></h1>
<small>I code stuff. I take photos. I write rants.</small>
</header>
<main class="contents" role="main">
<article lang="en" class="code" itemscope="" itemtype="http://schema.org/BlogPosting">
<link itemprop="url" href="/code/2013/get-your-frontend-javascript-code-covered/"/>
<header>
<h2><a itemprop="name" href="/code/2013/get-your-frontend-javascript-code-covered/">Get your Frontend JavaScript Code Covered</a></h2>
</header>
<section>
<p><strong>So finally you're <a href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">testing your frontend JavaScript code</a>? Great! The more you
write tests, the more confident you are with your code… but how much precisely?
That's where <a href="http://en.wikipedia.org/wiki/Code_coverage">code coverage</a> might
help.</strong>
</p>
<p>The idea behind code coverage is to record which parts of your code (functions,
statements, conditionals and so on) have been executed by your test suite,
to compute metrics out of these data and usually to provide tools for navigating
and inspecting them.</p>
<p>Not a lot of frontend developers I know actually test their frontend code,
and I can barely imagine how many of them have ever setup code coverage…
Mostly because there are not many frontend-oriented tools in this area
I guess.</p>
<p>Actually I've only found one which provides an adapter for <a href="http://visionmedia.github.io/mocha/">Mocha</a> and
actually works…</p>
<blockquote class="twitter-tweet tw-align-center">
<p>Drinking game for web devs:
<br />(1) Think of a noun
<br />(2) Google "&lt;noun&gt;.js"
<br />(3) If a library with that name exists - drink</p>— Shay Friedman (@ironshay)
<a
href="https://twitter.com/ironshay/statuses/370525864523743232">August 22, 2013</a>
</blockquote>
<p><strong><a href="http://blanketjs.org/">Blanket.js</a></strong> is an <em>easy to install, easy to configure,
and easy to use JavaScript code coverage library that works both in-browser and
with nodejs.</em>
</p>
<p>Its use is dead easy, adding Blanket support to your Mocha test suite
is just matter of adding this simple line to your HTML test file:</p>
<pre><code>&lt;script src="vendor/blanket.js"
data-cover-adapter="vendor/mocha-blanket.js"&gt;&lt;/script&gt;
</code></pre>
<p>Source files: <a href="https://raw.github.com/alex-seville/blanket/master/dist/qunit/blanket.min.js">blanket.js</a>,
<a
href="https://raw.github.com/alex-seville/blanket/master/src/adapters/mocha-blanket.js">mocha-blanket.js</a>
</p>
<p>As an example, let's reuse the silly <code>Cow</code> example we used
<a
href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">in a previous episode</a>:</p>
<pre><code>// cow.js
(function(exports) {
"use strict";
function Cow(name) {
this.name = name || "Anon cow";
}
exports.Cow = Cow;
Cow.prototype = {
greets: function(target) {
if (!target)
throw new Error("missing target");
return this.name + " greets " + target;
}
};
})(this);
</code></pre>
<p>And its test suite, powered by Mocha and <a href="http://chaijs.com/">Chai</a>:</p>
<pre><code>var expect = chai.expect;
describe("Cow", function() {
describe("constructor", function() {
it("should have a default name", function() {
var cow = new Cow();
expect(cow.name).to.equal("Anon cow");
});
it("should set cow's name if provided", function() {
var cow = new Cow("Kate");
expect(cow.name).to.equal("Kate");
});
});
describe("#greets", function() {
it("should greet passed target", function() {
var greetings = (new Cow("Kate")).greets("Baby");
expect(greetings).to.equal("Kate greets Baby");
});
});
});
</code></pre>
<p>Let's create the HTML test file for it, featuring Blanket and its adapter
for Mocha:</p>
<pre><code>&lt;!DOCTYPE html&gt;
&lt;html&gt;
&lt;head&gt;
&lt;meta charset="utf-8"&gt;
&lt;title&gt;Test&lt;/title&gt;
&lt;link rel="stylesheet" media="all" href="vendor/mocha.css"&gt;
&lt;/head&gt;
&lt;body&gt;
&lt;div id="mocha"&gt;&lt;/div&gt;
&lt;div id="messages"&gt;&lt;/div&gt;
&lt;div id="fixtures"&gt;&lt;/div&gt;
&lt;script src="vendor/mocha.js"&gt;&lt;/script&gt;
&lt;script src="vendor/chai.js"&gt;&lt;/script&gt;
&lt;script src="vendor/blanket.js"
data-cover-adapter="vendor/mocha-blanket.js"&gt;&lt;/script&gt;
&lt;script&gt;mocha.setup('bdd');&lt;/script&gt;
&lt;script src="cow.js" data-cover&gt;&lt;/script&gt;
&lt;script src="cow_test.js"&gt;&lt;/script&gt;
&lt;script&gt;mocha.run();&lt;/script&gt;
&lt;/body&gt;
&lt;/html&gt;
</code></pre>
<p><strong>Notes</strong>:</p>
<ul>
<li>Notice the <code>data-cover</code> attribute we added to the script tag
loading the source of our library;</li>
<li>The HTML test file <em>must</em> be served over HTTP for the adapter to
be loaded.</li>
</ul>
<p>Running the tests now gives us something like this:</p>
<p>
<img alt="screenshot" src="/static/code/2013/blanket-coverage.png"/>
</p>
<p>As you can see, the report at the bottom highlights that we haven't actually
tested the case where an error is raised in case a target name is missing.
We've been informed of that, nothing more, nothing less. We simply know
we're missing a test here. Isn't this cool? I think so!</p>
<p>Just remember that code coverage will only <a href="http://codebetter.com/karlseguin/2008/12/09/code-coverage-use-it-wisely/">bring you numbers</a> and
raw information, not actual proofs that the whole of your <em>code logic</em> has
been actually covered. If you ask me, the best inputs you can get about
your code logic and implementation ever are the ones issued out of <a href="http://www.extremeprogramming.org/rules/pair.html">pair programming</a>
sessions
and <a href="http://alexgaynor.net/2013/sep/26/effective-code-review/">code reviews</a>
but that's another story.</p>
<p><strong>So is code coverage silver bullet? No. Is it useful? Definitely. Happy testing!</strong>
</p>
</section>
<aside>
<p> <span class="article-author" itemprop="author" itemscope="" itemtype="http://schema.org/Person">
<span itemprop="name">Nicolas Perriault</span></span>
<time
datetime="2013-09-29" itemprop="datePublished">2013-09-29</time>— in <a href="/code/" itemprop="genre">Code</a>
<a href="/code/2013/get-your-frontend-javascript-code-covered/">Permalink</a>
<a
rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/">License</a><a href="http://flattr.com/submit/auto?url=https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/&amp;title=Get your Frontend JavaScript Code Covered&amp;user_id=n1k0&amp;category=software&amp;language=en">flattr this</a>
</p>
</aside>
<hr/>
<nav> <a class="prev" href="/code/2013/functional-javascript-for-crawling-the-web/">Functional JavaScript for crawling the Web</a>
|
<a
class="next" href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">Testing your frontend JavaScript code using mocha, chai, and sinon</a>
</nav>
</article>
</main>
<nav class="sidebar">
<ul>
<li class="home"><a href="/" hreflang="en">Home</a>
</li>
<li class="code"><a href="/code/" hreflang="en">Code</a>
</li>
<li class="photography"><a href="/photography/" hreflang="en">Photography</a>
</li>
<li class="talks"><a href="/talks/" hreflang="en">Talks</a>
</li>
<li class="carnet"><a href="/carnet/" hreflang="fr">Carnet <span>fr</span></a>
</li>
<li class="contact"><a href="/contact/" hreflang="en">Contact</a>
</li>
</ul>
</nav>
<footer class="site-footer">
<p>© 2012 Nicolas Perriault — <a href="https://twitter.com/n1k0">Tweet at me</a>
<a
href="https://github.com/n1k0">Get my code</a><a href="http://500px.com/n1k0">Enjoy my pics</a>
<a href="/contact/">Contact me</a>
</p>
</footer>
</div>
<!-- /container -->
<script src="//ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script>
<script>
window.jQuery || document.write('&lt;script src="js/libs/jquery-1.7.1.min.js">&lt;\/script>')
</script>
<script type="text/javascript" src="/static/js/libs/prettify/prettify.js"></script>
<script type="text/javascript" src="/static/js/app.js"></script>
<script src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
</body>
</html>

@ -0,0 +1,8 @@
{
"Author": "Nikhil Marathe",
"Direction": null,
"Excerpt": "For more than a decade the Web has used XMLHttpRequest (XHR) to achieve asynchronous requests in JavaScript. While very useful, XHR is not a very ...",
"Image": null,
"Title": "This API is so Fetching!",
"SiteName": "Mozilla Hacks the Web developer blog"
}

@ -0,0 +1,415 @@
<div id="content-main"><article role="article"><p>For more than a decade the Web has used XMLHttpRequest (XHR) to achieve
asynchronous requests in JavaScript. While very useful, XHR is not a very
nice API. It suffers from lack of separation of concerns. The input, output
and state are all managed by interacting with one object, and state is
tracked using events. Also, the event-based model doesnt play well with
JavaScripts recent focus on Promise- and generator-based asynchronous
programming.</p>
<p>The <a href="https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API">Fetch API</a> intends
to fix most of these problems. It does this by introducing the same primitives
to JS that are used in the HTTP protocol. In addition, it introduces a
utility function <code>fetch()</code> that succinctly captures the intention
of retrieving a resource from the network.</p>
<p>The <a href="https://fetch.spec.whatwg.org">Fetch specification</a>, which
defines the API, nails down the semantics of a user agent fetching a resource.
This, combined with ServiceWorkers, is an attempt to:</p>
<ol><li>Improve the offline experience.</li>
<li>Expose the building blocks of the Web to the platform as part of the
<a href="https://extensiblewebmanifesto.org/">extensible web movement</a>.</li>
</ol><p>As of this writing, the Fetch API is available in Firefox 39 (currently
Nightly) and Chrome 42 (currently dev). Github has a <a href="https://github.com/github/fetch">Fetch polyfill</a>.</p>
<h2>Feature detection</h2>
<p>Fetch API support can be detected by checking for <code>Headers</code>,<code>Request</code>, <code>Response</code> or <code>fetch</code> on
the <code>window</code> or <code>worker</code> scope.</p>
<h2>Simple fetching</h2>
<p>The most useful, high-level part of the Fetch API is the <code>fetch()</code> function.
In its simplest form it takes a URL and returns a promise that resolves
to the response. The response is captured as a <code>Response</code> object.</p>
<div>
<div><pre>fetch<span>(</span><span>"/data.json"</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>res<span>)</span> <span>{</span>
<span>// res instanceof Response == true.</span>
<span>if</span> <span>(</span>res.<span>ok</span><span>)</span> <span>{</span>
res.<span>json</span><span>(</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>data<span>)</span> <span>{</span>
console.<span>log</span><span>(</span>data.<span>entries</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span>
<span>}</span> <span>else</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Looks like the response wasn't perfect, got status"</span><span>,</span> res.<span>status</span><span>)</span><span>;</span>
<span>}</span>
<span>}</span><span>,</span> <span>function</span><span>(</span>e<span>)</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Fetch failed!"</span><span>,</span> e<span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div></div>
<p>Submitting some parameters, it would look like this:</p>
<div>
<div><pre>fetch<span>(</span><span>"http://www.example.org/submit.php"</span><span>,</span> <span>{</span>
method<span>:</span> <span>"POST"</span><span>,</span>
headers<span>:</span> <span>{</span>
<span>"Content-Type"</span><span>:</span> <span>"application/x-www-form-urlencoded"</span>
<span>}</span><span>,</span>
body<span>:</span> <span>"firstName=Nikhil&amp;favColor=blue&amp;password=easytoguess"</span>
<span>}</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>res<span>)</span> <span>{</span>
<span>if</span> <span>(</span>res.<span>ok</span><span>)</span> <span>{</span>
alert<span>(</span><span>"Perfect! Your settings are saved."</span><span>)</span><span>;</span>
<span>}</span> <span>else</span> <span>if</span> <span>(</span>res.<span>status</span> <span>==</span> <span>401</span><span>)</span> <span>{</span>
alert<span>(</span><span>"Oops! You are not authorized."</span><span>)</span><span>;</span>
<span>}</span>
<span>}</span><span>,</span> <span>function</span><span>(</span>e<span>)</span> <span>{</span>
alert<span>(</span><span>"Error submitting form!"</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div></div>
<p>The <code>fetch()</code> functions arguments are the same as those passed
to the
<br></br><code>Request()</code> constructor, so you may directly pass arbitrarily
complex requests to <code>fetch()</code> as discussed below.</p>
<h2>Headers</h2>
<p>Fetch introduces 3 interfaces. These are <code>Headers</code>, <code>Request</code> and
<br></br><code>Response</code>. They map directly to the underlying HTTP concepts,
but have
<br></br>certain visibility filters in place for privacy and security reasons,
such as
<br></br>supporting CORS rules and ensuring cookies arent readable by third parties.</p>
<p>The <a href="https://fetch.spec.whatwg.org/#headers-class">Headers interface</a> is
a simple multi-map of names to values:</p>
<div>
<div><pre><span>var</span> content <span>=</span> <span>"Hello World"</span><span>;</span>
<span>var</span> reqHeaders <span>=</span> <span>new</span> Headers<span>(</span><span>)</span><span>;</span>
reqHeaders.<span>append</span><span>(</span><span>"Content-Type"</span><span>,</span> <span>"text/plain"</span>
reqHeaders.<span>append</span><span>(</span><span>"Content-Length"</span><span>,</span> content.<span>length</span>.<span>toString</span><span>(</span><span>)</span><span>)</span><span>;</span>
reqHeaders.<span>append</span><span>(</span><span>"X-Custom-Header"</span><span>,</span> <span>"ProcessThisImmediately"</span><span>)</span><span>;</span></pre>
</div></div>
<p>The same can be achieved by passing an array of arrays or a JS object
literal
<br></br>to the constructor:</p>
<div>
<div><pre>reqHeaders <span>=</span> <span>new</span> Headers<span>(</span><span>{</span>
<span>"Content-Type"</span><span>:</span> <span>"text/plain"</span><span>,</span>
<span>"Content-Length"</span><span>:</span> content.<span>length</span>.<span>toString</span><span>(</span><span>)</span><span>,</span>
<span>"X-Custom-Header"</span><span>:</span> <span>"ProcessThisImmediately"</span><span>,</span>
<span>}</span><span>)</span><span>;</span></pre>
</div></div>
<p>The contents can be queried and retrieved:</p>
<div>
<div><pre>console.<span>log</span><span>(</span>reqHeaders.<span>has</span><span>(</span><span>"Content-Type"</span><span>)</span><span>)</span><span>;</span> <span>// true</span>
console.<span>log</span><span>(</span>reqHeaders.<span>has</span><span>(</span><span>"Set-Cookie"</span><span>)</span><span>)</span><span>;</span> <span>// false</span>
reqHeaders.<span>set</span><span>(</span><span>"Content-Type"</span><span>,</span> <span>"text/html"</span><span>)</span><span>;</span>
reqHeaders.<span>append</span><span>(</span><span>"X-Custom-Header"</span><span>,</span> <span>"AnotherValue"</span><span>)</span><span>;</span>
 
console.<span>log</span><span>(</span>reqHeaders.<span>get</span><span>(</span><span>"Content-Length"</span><span>)</span><span>)</span><span>;</span> <span>// 11</span>
console.<span>log</span><span>(</span>reqHeaders.<span>getAll</span><span>(</span><span>"X-Custom-Header"</span><span>)</span><span>)</span><span>;</span> <span>// ["ProcessThisImmediately", "AnotherValue"]</span>
 
reqHeaders.<span>delete</span><span>(</span><span>"X-Custom-Header"</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>reqHeaders.<span>getAll</span><span>(</span><span>"X-Custom-Header"</span><span>)</span><span>)</span><span>;</span> <span>// []</span></pre>
</div></div>
<p>Some of these operations are only useful in ServiceWorkers, but they provide
<br></br>a much nicer API to Headers.</p>
<p>Since Headers can be sent in requests, or received in responses, and have
various limitations about what information can and should be mutable, <code>Headers</code> objects
have a <strong>guard</strong> property. This is not exposed to the Web, but
it affects which mutation operations are allowed on the Headers object.
<br></br>Possible values are:</p>
<ul><li>“none”: default.</li>
<li>“request”: guard for a Headers object obtained from a Request (<code>Request.headers</code>).</li>
<li>“request-no-cors”: guard for a Headers object obtained from a Request
created
<br></br>with mode “no-cors”.</li>
<li>“response”: naturally, for Headers obtained from Response (<code>Response.headers</code>).</li>
<li>“immutable”: Mostly used for ServiceWorkers, renders a Headers object
<br></br>read-only.</li>
</ul><p>The details of how each guard affects the behaviors of the Headers object
are
<br></br>in the <a href="https://fetch.spec.whatwg.org">specification</a>. For example,
you may not append or set a “request” guarded Headers “Content-Length”
header. Similarly, inserting “Set-Cookie” into a Response header is not
allowed so that ServiceWorkers may not set cookies via synthesized Responses.</p>
<p>All of the Headers methods throw TypeError if <code>name</code> is not a
<a href="https://fetch.spec.whatwg.org/#concept-header-name">valid HTTP Header name</a>. The mutation operations will throw TypeError
if there is an immutable guard. Otherwise they fail silently. For example:</p>
<div>
<div><pre><span>var</span> res <span>=</span> Response.<span>error</span><span>(</span><span>)</span><span>;</span>
<span>try</span> <span>{</span>
res.<span>headers</span>.<span>set</span><span>(</span><span>"Origin"</span><span>,</span> <span>"http://mybank.com"</span><span>)</span><span>;</span>
<span>}</span> <span>catch</span><span>(</span>e<span>)</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Cannot pretend to be a bank!"</span><span>)</span><span>;</span>
<span>}</span></pre>
</div></div>
<h2>Request</h2>
<p>The Request interface defines a request to fetch a resource over HTTP.
URL, method and headers are expected, but the Request also allows specifying
a body, a request mode, credentials and cache hints.</p>
<p>The simplest Request is of course, just a URL, as you may do to GET a
resource.</p>
<div>
<div><pre><span>var</span> req <span>=</span> <span>new</span> Request<span>(</span><span>"/index.html"</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>req.<span>method</span><span>)</span><span>;</span> <span>// "GET"</span>
console.<span>log</span><span>(</span>req.<span>url</span><span>)</span><span>;</span> <span>// "http://example.com/index.html"</span></pre>
</div></div>
<p>You may also pass a Request to the <code>Request()</code> constructor to
create a copy.
<br></br>(This is not the same as calling the <code>clone()</code> method, which
is covered in
<br></br>the “Reading bodies” section.).</p>
<div>
<div><pre><span>var</span> copy <span>=</span> <span>new</span> Request<span>(</span>req<span>)</span><span>;</span>
console.<span>log</span><span>(</span>copy.<span>method</span><span>)</span><span>;</span> <span>// "GET"</span>
console.<span>log</span><span>(</span>copy.<span>url</span><span>)</span><span>;</span> <span>// "http://example.com/index.html"</span></pre>
</div></div>
<p>Again, this form is probably only useful in ServiceWorkers.</p>
<p>The non-URL attributes of the <code>Request</code> can only be set by passing
initial
<br></br>values as a second argument to the constructor. This argument is a dictionary.</p>
<div>
<div><pre><span>var</span> uploadReq <span>=</span> <span>new</span> Request<span>(</span><span>"/uploadImage"</span><span>,</span> <span>{</span>
method<span>:</span> <span>"POST"</span><span>,</span>
headers<span>:</span> <span>{</span>
<span>"Content-Type"</span><span>:</span> <span>"image/png"</span><span>,</span>
<span>}</span><span>,</span>
body<span>:</span> <span>"image data"</span>
<span>}</span><span>)</span><span>;</span></pre>
</div></div>
<p>The Requests mode is used to determine if cross-origin requests lead
to valid responses, and which properties on the response are readable.
Legal mode values are <code>"same-origin"</code>, <code>"no-cors"</code> (default)
and <code>"cors"</code>.</p>
<p>The <code>"same-origin"</code> mode is simple, if a request is made to another
origin with this mode set, the result is simply an error. You could use
this to ensure that
<br></br>a request is always being made to your origin.</p>
<div>
<div><pre><span>var</span> arbitraryUrl <span>=</span> document.<span>getElementById</span><span>(</span><span>"url-input"</span><span>)</span>.<span>value</span><span>;</span>
fetch<span>(</span>arbitraryUrl<span>,</span> <span>{</span> mode<span>:</span> <span>"same-origin"</span> <span>}</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>res<span>)</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Response succeeded?"</span><span>,</span> res.<span>ok</span><span>)</span><span>;</span>
<span>}</span><span>,</span> <span>function</span><span>(</span>e<span>)</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Please enter a same-origin URL!"</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div></div>
<p>The <code>"no-cors"</code> mode captures what the web platform does by default
for scripts you import from CDNs, images hosted on other domains, and so
on. First, it prevents the method from being anything other than “HEAD”,
“GET” or “POST”. Second, if any ServiceWorkers intercept these requests,
they may not add or override any headers except for <a href="https://fetch.spec.whatwg.org/#simple-header">these</a>.
Third, JavaScript may not access any properties of the resulting Response.
This ensures that ServiceWorkers do not affect the semantics of the Web
and prevents security and privacy issues that could arise from leaking
data across domains.</p>
<p><code>"cors"</code> mode is what youll usually use to make known cross-origin
requests to access various APIs offered by other vendors. These are expected
to adhere to
<br></br>the <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS">CORS protocol</a>.
Only a <a href="https://fetch.spec.whatwg.org/#concept-filtered-response-cors">limited set</a> of
headers is exposed in the Response, but the body is readable. For example,
you could get a list of Flickrs <a href="https://www.flickr.com/services/api/flickr.interestingness.getList.html">most interesting</a> photos
today like this:</p>
<div>
<div><pre><span>var</span> u <span>=</span> <span>new</span> URLSearchParams<span>(</span><span>)</span><span>;</span>
u.<span>append</span><span>(</span><span>'method'</span><span>,</span> <span>'flickr.interestingness.getList'</span><span>)</span><span>;</span>
u.<span>append</span><span>(</span><span>'api_key'</span><span>,</span> <span>'&lt;insert api key here&gt;'</span><span>)</span><span>;</span>
u.<span>append</span><span>(</span><span>'format'</span><span>,</span> <span>'json'</span><span>)</span><span>;</span>
u.<span>append</span><span>(</span><span>'nojsoncallback'</span><span>,</span> <span>'1'</span><span>)</span><span>;</span>
 
<span>var</span> apiCall <span>=</span> fetch<span>(</span><span>'https://api.flickr.com/services/rest?'</span> <span>+</span> u<span>)</span><span>;</span>
 
apiCall.<span>then</span><span>(</span><span>function</span><span>(</span>response<span>)</span> <span>{</span>
<span>return</span> response.<span>json</span><span>(</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>json<span>)</span> <span>{</span>
<span>// photo is a list of photos.</span>
<span>return</span> json.<span>photos</span>.<span>photo</span><span>;</span>
<span>}</span><span>)</span><span>;</span>
<span>}</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>photos<span>)</span> <span>{</span>
photos.<span>forEach</span><span>(</span><span>function</span><span>(</span>photo<span>)</span> <span>{</span>
console.<span>log</span><span>(</span>photo.<span>title</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div></div>
<p>You may not read out the “Date” header since Flickr does not allow it
via
<br></br><code>Access-Control-Expose-Headers</code>.</p>
<div>
<div><pre>response.<span>headers</span>.<span>get</span><span>(</span><span>"Date"</span><span>)</span><span>;</span> <span>// null</span></pre>
</div></div>
<p>The <code>credentials</code> enumeration determines if cookies for the other
domain are
<br></br>sent to cross-origin requests. This is similar to XHRs <code>withCredentials</code>
<br></br>flag, but tri-valued as <code>"omit"</code> (default), <code>"same-origin"</code> and <code>"include"</code>.</p>
<p>The Request object will also give the ability to offer caching hints to
the user-agent. This is currently undergoing some <a href="https://github.com/slightlyoff/ServiceWorker/issues/585">security review</a>.
Firefox exposes the attribute, but it has no effect.</p>
<p>Requests have two read-only attributes that are relevant to ServiceWorkers
<br></br>intercepting them. There is the string <code>referrer</code>, which is
set by the UA to be
<br></br>the referrer of the Request. This may be an empty string. The other is
<br></br><code>context</code> which is a rather <a href="https://fetch.spec.whatwg.org/#requestcredentials">large enumeration</a> defining
what sort of resource is being fetched. This could be “image” if the request
is from an
&lt;img&gt;tag in the controlled document, “worker” if it is an attempt to load a
worker script, and so on. When used with the <code>fetch()</code> function,
it is “fetch”.</p>
<h2>Response</h2>
<p><code>Response</code> instances are returned by calls to <code>fetch()</code>.
They can also be created by JS, but this is only useful in ServiceWorkers.</p>
<p>We have already seen some attributes of Response when we looked at <code>fetch()</code>.
The most obvious candidates are <code>status</code>, an integer (default
value 200) and <code>statusText</code> (default value “OK”), which correspond
to the HTTP status code and reason. The <code>ok</code> attribute is just
a shorthand for checking that <code>status</code> is in the range 200-299
inclusive.</p>
<p><code>headers</code> is the Responses Headers object, with guard “response”.
The <code>url</code> attribute reflects the URL of the corresponding request.</p>
<p>Response also has a <code>type</code>, which is “basic”, “cors”, “default”,
“error” or
<br></br>“opaque”.</p>
<ul><li><code>"basic"</code>: normal, same origin response, with all headers exposed
except
<br></br>“Set-Cookie” and “Set-Cookie2″.</li>
<li><code>"cors"</code>: response was received from a valid cross-origin request.
<a href="https://fetch.spec.whatwg.org/#concept-filtered-response-cors">Certain headers and the body</a>may be accessed.</li>
<li><code>"error"</code>: network error. No useful information describing
the error is available. The Responses status is 0, headers are empty and
immutable. This is the type for a Response obtained from <code>Response.error()</code>.</li>
<li><code>"opaque"</code>: response for “no-cors” request to cross-origin
resource. <a href="https://fetch.spec.whatwg.org/#concept-filtered-response-opaque">Severely<br></br>
restricted</a>
</li>
</ul><p>The “error” type results in the <code>fetch()</code> Promise rejecting with
TypeError.</p>
<p>There are certain attributes that are useful only in a ServiceWorker scope.
The
<br></br>idiomatic way to return a Response to an intercepted request in ServiceWorkers
is:</p>
<div>
<div><pre>addEventListener<span>(</span><span>'fetch'</span><span>,</span> <span>function</span><span>(</span>event<span>)</span> <span>{</span>
event.<span>respondWith</span><span>(</span><span>new</span> Response<span>(</span><span>"Response body"</span><span>,</span> <span>{</span>
headers<span>:</span> <span>{</span> <span>"Content-Type"</span> <span>:</span> <span>"text/plain"</span> <span>}</span>
<span>}</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div></div>
<p>As you can see, Response has a two argument constructor, where both arguments
are optional. The first argument is a body initializer, and the second
is a dictionary to set the <code>status</code>, <code>statusText</code> and <code>headers</code>.</p>
<p>The static method <code>Response.error()</code> simply returns an error
response. Similarly, <code>Response.redirect(url, status)</code> returns
a Response resulting in
<br></br>a redirect to <code>url</code>.</p>
<h2>Dealing with bodies</h2>
<p>Both Requests and Responses may contain body data. Weve been glossing
over it because of the various data types body may contain, but we will
cover it in detail now.</p>
<p>A body is an instance of any of the following types.</p>
<ul><li><a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/ArrayBuffer">ArrayBuffer</a>
</li>
<li><a href="https://developer.mozilla.org/en-US/docs/Web/API/ArrayBufferView">ArrayBufferView</a> (Uint8Array
and friends)</li>
<li><a href="https://developer.mozilla.org/en-US/docs/Web/API/Blob">Blob</a>/
<a href="https://developer.mozilla.org/en-US/docs/Web/API/File">File</a>
</li>
<li>string</li>
<li><a href="https://url.spec.whatwg.org/#interface-urlsearchparams">URLSearchParams</a>
</li>
<li><a href="https://developer.mozilla.org/en-US/docs/Web/API/FormData">FormData</a>
currently not supported by either Gecko or Blink. Firefox expects to ship
this in version 39 along with the rest of Fetch.</li>
</ul><p>In addition, Request and Response both offer the following methods to
extract their body. These all return a Promise that is eventually resolved
with the actual content.</p>
<ul><li><code>arrayBuffer()</code>
</li>
<li><code>blob()</code>
</li>
<li><code>json()</code>
</li>
<li><code>text()</code>
</li>
<li><code>formData()</code>
</li>
</ul><p>This is a significant improvement over XHR in terms of ease of use of
non-text data!</p>
<p>Request bodies can be set by passing <code>body</code> parameters:</p>
<div>
<div><pre><span>var</span> form <span>=</span> <span>new</span> FormData<span>(</span>document.<span>getElementById</span><span>(</span><span>'login-form'</span><span>)</span><span>)</span><span>;</span>
fetch<span>(</span><span>"/login"</span><span>,</span> <span>{</span>
method<span>:</span> <span>"POST"</span><span>,</span>
body<span>:</span> form
<span>}</span><span>)</span></pre>
</div></div>
<p>Responses take the first argument as the body.</p>
<div>
<div><pre><span>var</span> res <span>=</span> <span>new</span> Response<span>(</span><span>new</span> File<span>(</span><span>[</span><span>"chunk"</span><span>,</span> <span>"chunk"</span><span>]</span><span>,</span> <span>"archive.zip"</span><span>,</span>
<span>{</span> type<span>:</span> <span>"application/zip"</span> <span>}</span><span>)</span><span>)</span><span>;</span></pre>
</div></div>
<p>Both Request and Response (and by extension the <code>fetch()</code> function),
will try to intelligently <a href="https://fetch.spec.whatwg.org/#concept-bodyinit-extract">determine the content type</a>.
Request will also automatically set a “Content-Type” header if none is
set in the dictionary.</p>
<h3>Streams and cloning</h3>
<p>It is important to realise that Request and Response bodies can only be
read once! Both interfaces have a boolean attribute <code>bodyUsed</code> to
determine if it is safe to read or not.</p>
<div>
<div><pre><span>var</span> res <span>=</span> <span>new</span> Response<span>(</span><span>"one time use"</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>res.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
res.<span>text</span><span>(</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>v<span>)</span> <span>{</span>
console.<span>log</span><span>(</span>res.<span>bodyUsed</span><span>)</span><span>;</span> <span>// true</span>
<span>}</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>res.<span>bodyUsed</span><span>)</span><span>;</span> <span>// true</span>
 
res.<span>text</span><span>(</span><span>)</span>.<span>catch</span><span>(</span><span>function</span><span>(</span>e<span>)</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Tried to read already consumed Response"</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div></div>
<p>This decision allows easing the transition to an eventual <a href="https://streams.spec.whatwg.org/">stream-based</a> Fetch
API. The intention is to let applications consume data as it arrives, allowing
for JavaScript to deal with larger files like videos, and perform things
like compression and editing on the fly.</p>
<p>Often, youll want access to the body multiple times. For example, you
can use the upcoming <a href="http://slightlyoff.github.io/ServiceWorker/spec/service_worker/index.html#cache-objects">Cache API</a> to
store Requests and Responses for offline use, and Cache requires bodies
to be available for reading.</p>
<p>So how do you read out the body multiple times within such constraints?
The API provides a <code>clone()</code> method on the two interfaces. This
will return a clone of the object, with a new body. <code>clone()</code> MUST
be called before the body of the corresponding object has been used. That
is, <code>clone()</code> first, read later.</p>
<div>
<div><pre>addEventListener<span>(</span><span>'fetch'</span><span>,</span> <span>function</span><span>(</span>evt<span>)</span> <span>{</span>
<span>var</span> sheep <span>=</span> <span>new</span> Response<span>(</span><span>"Dolly"</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>sheep.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
<span>var</span> clone <span>=</span> sheep.<span>clone</span><span>(</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>clone.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
 
clone.<span>text</span><span>(</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>sheep.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
console.<span>log</span><span>(</span>clone.<span>bodyUsed</span><span>)</span><span>;</span> <span>// true</span>
 
evt.<span>respondWith</span><span>(</span>cache.<span>add</span><span>(</span>sheep.<span>clone</span><span>(</span><span>)</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>e<span>)</span> <span>{</span>
<span>return</span> sheep<span>;</span>
<span>}</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div></div>
<h2>Future improvements</h2>
<p>Along with the transition to streams, Fetch will eventually have the ability
to abort running <code>fetch()</code>es and some way to report the progress
of a fetch. These are provided by XHR, but are a little tricky to fit in
the Promise-based nature of the Fetch API.</p>
<p>You can contribute to the evolution of this API by participating in discussions
on the <a href="https://whatwg.org/mailing-list">WHATWG mailing list</a> and
in the issues in the <a href="https://www.w3.org/Bugs/Public/buglist.cgi?product=WHATWG&amp;component=Fetch&amp;resolution=---">Fetch</a> and
<a href="https://github.com/slightlyoff/ServiceWorker/issues">ServiceWorker</a>specifications.</p>
<p>For a better web!</p>
<p><em>The author would like to thank Andrea Marchesini, Anne van Kesteren and Ben<br></br>
Kelly for helping with the specification and implementation.</em>
</p>
</article></div>

@ -0,0 +1,8 @@
{
"Author": "Dublin Core property author",
"Direction": null,
"Excerpt": "Dublin Core property description",
"Image": null,
"Title": "Dublin Core property title",
"SiteName": null
}

@ -0,0 +1,18 @@
<article>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>

@ -0,0 +1,45 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>Title Element</title>
<meta name="title" content="Meta name title"/>
<meta name="og:title" content="Open Graph name title"/>
<meta name="twitter:title" content="Twitter name title"/>
<meta name="DC.title" content="Dublin Core name title"/>
<meta property="dc:title" content="Dublin Core property title"/>
<meta property="twitter:title" content="Twitter property title"/>
<meta property="og:title" content="Open Graph property title"/>
<meta name="author" content="Meta name author"/>
<meta name="DC.creator" content="Dublin Core name author"/>
<meta property="dc:creator" content="Dublin Core property author"/>
<meta name="description" content="Meta name description"/>
<meta name="og:description" content="Open Graph name description"/>
<meta name="twitter:description" content="Twitter name description"/>
<meta name="DC.description" content="Dublin Core name description"/>
<meta property="dc:description" content="Dublin Core property description"/>
<meta property="twitter:description" content="Twitter property description"/>
<meta property="og:description" content="Open Graph property description"/>
</head>
<body>
<article>
<h1>Test document title</h1>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>
</body>
</html>

@ -0,0 +1,8 @@
{
"Author": "Creator Name",
"Direction": null,
"Excerpt": "Preferred description",
"Image": "http:\/\/fakehost.com\/image.jpg",
"Title": "Preferred title",
"SiteName": null
}

@ -0,0 +1,18 @@
<article>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>

@ -0,0 +1,36 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>Title Element</title>
<meta property="x:title dc:title" content="Preferred title"/>
<meta property="og:title twitter:title" content="A title"/>
<meta property="dc:creator twitter:site_name" content="Creator Name"/>
<meta name="author" content="FAIL"/>
<meta property="og:description x:description twitter:description" content="A description"/>
<meta property="dc:description og:description" content="Preferred description"/>
<meta property="twitter:image og:image" content="http://fakehost.com/image.jpg"/>
<meta name="description" content="FAIL"/>
</head>
<body>
<article>
<h1>Test document title</h1>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>
</body>
</html>

@ -0,0 +1,3 @@
[
"https:\/\/www.aclu.org\/sites\/default\/files\/styles\/metatag_og_image_1200x630\/public\/field_share_image\/web18-facebook-socialshare-1200x628-v02.png?itok=p77cQjOm"
]

@ -0,0 +1,8 @@
{
"Author": "By Daniel Kahn Gillmor, Senior Staff Technologist, ACLU Speech, Privacy, and Technology Project",
"Direction": "ltr",
"Excerpt": "I don't use Facebook. I'm not technophobic — I'm a geek. I've been using email since the early 1990s, I have accounts on hundreds of services around the net, and I do software development and internet protocol design both for work and for fun. I believe that a globe-spanning communications network like the internet can be a positive social force, and I publish much of my own work on the open web.",
"Image": "https:\/\/www.aclu.org\/sites\/default\/files\/styles\/metatag_og_image_1200x630\/public\/field_share_image\/web18-facebook-socialshare-1200x628-v02.png?itok=p77cQjOm",
"Title": "Facebook Is Tracking Me Even Though Im Not on Facebook",
"SiteName": "American Civil Liberties Union"
}

@ -0,0 +1,124 @@
<div>
<p>
I don't use Facebook. I'm not technophobic — I'm a geek. I've been using email since the early 1990s, I have accounts on hundreds of services around the net, and I do software development and internet protocol design both for work and for fun. I believe that a globe-spanning communications network like the internet can be a positive social force, and I publish much of my own work on the open web.
</p>
<p>
But Facebook and other massive web companies represent a strong push toward unaccountable centralized social control, which I think makes our society more unequal and more unjust. The Cambridge Analytica scandal is one instance of this long-running problem with what I call the "surveillance economy." I don't want to submit to these power structures, and I dont want my presence on such platforms to serve as bait that lures other people into the digital panopticon.
</p>
<p>
But while I've never "opted in" to Facebook or any of the other big social networks, Facebook still has a detailed profile that can be used to target me. I've never consented to having Facebook collect my data, which can be used to draw very detailed inferences about my life, my habits, and my relationships. As we aim to take Facebook to task for its breach of user trust, we need to think about what its capabilities imply for society overall. After all, if you do #deleteFacebook, you'll find yourself in my shoes: non-consenting, but still subject to Facebooks globe-spanning surveillance and targeting network.
</p>
<p>
There are at least two major categories of information available to Facebook about non-participants like me: information from other Facebook users, and information from sites on the open web.
</p>
<h3>
<strong>Information from other Facebook users</strong>
</h3>
<p>
When you sign up for Facebook, it encourages you to upload your list of contacts so that the site can "find your friends." Facebook uses this contact information to learn about people, even if those people don't agree to participate. It also links people together based on who they know, even if the shared contact hasn't agreed to this use.
</p>
<p>
For example, I received an email from Facebook that lists the people who have all invited me to join Facebook: my aunt, an old co-worker, a friend from elementary school, etc. This email includes names and email addresses — including my own name — and at least one <a href="https://en.wikipedia.org/wiki/Web_bug">web bug</a> designed to identify me to Facebooks web servers when I open the email. Facebook records this group of people as my contacts, even though I've never agreed to this kind of data collection.
</p>
<p>
Similarly, I'm sure that I'm in some photographs that someone has uploaded to Facebook — and I'm probably tagged in some of them. I've never agreed to this, but Facebook could still be keeping track.
</p>
<p>
So even if you decide you need to join Facebook, remember that you might be giving the company information about someone else who didn't agree to be part of its surveillance platform.
</p>
<h3>
<strong>Information from sites on the open Web</strong>
</h3>
<p>
Nearly every website that you visit that has a "Like" button is actually encouraging your browser to tell Facebook about your browsing habits. Even if you don't click on the "Like" button, displaying it requires your browser to send a request to Facebook's servers for the "Like" button itself. That request includes <a href="https://en.wikipedia.org/wiki/HTTP_referer">information</a> mentioning the name of the page you are visiting and any Facebook-specific <a href="https://en.wikipedia.org/wiki/HTTP_cookie">cookies</a> your browser might have collected. (See <a href="https://www.facebook.com/help/186325668085084">Facebook's own description of this process</a>.) This is called a "third-party request."
</p>
<p>
This makes it possible for Facebook to create a detailed picture of your browsing history — even if you've never even visited Facebook directly, let alone signed up for a Facebook account.
</p>
<p>
Think about most of the web pages you've visited — how many of them <em>don't</em> have a "Like" button? If you administer a website and you include a "Like" button on every page, you're helping Facebook to build profiles of your visitors, even those who have opted out of the social network. Facebooks <a href="https://developers.facebook.com/docs/plugins/">“Share” buttons</a> on other sites — along with <a href="https://www.facebook.com/business/learn/facebook-ads-pixel">other tools</a> — work a bit differently from the “Like” button, but do effectively the same thing.
</p>
<p>
The profiles that Facebook builds on non-users don't necessarily include so-called "personally identifiable information" (PII) like names or email addresses. But they do include fairly unique patterns. Using <a href="https://dev.chromium.org/for-testers/providing-network-details">Chromium's NetLog dumping</a>, I performed a simple five-minute browsing test last week that included visits to various sites — but not Facebook. In that test, the PII-free data that was sent to Facebook included information about which news articles I was reading, my dietary preferences, and my hobbies.
</p>
<p>
Given the precision of this kind of mapping and targeting, "PII" isnt necessary to reveal my identity. How many vegans examine specifications for computer hardware from the ACLU's offices while reading about Cambridge Analytica? Anyway, if Facebook combined that information with the "web bug" from the email mentioned above — which <em>is</em> clearly linked to my name and e-mail address — no guesswork would be required.
</p>
<p>
I'd be shocked if Facebook were not connecting those dots given the goals <a href="https://www.facebook.com/about/privacy/cookies">they claim for data collection</a>:
</p>
<blockquote>
<p>
We use the information we have to improve our advertising and measurement systems so we can show you relevant ads on and off our Services and measure the effectiveness and reach of ads and services.
</p>
</blockquote>
<p>
This is, in essence, exactly what Cambridge Analytica did.
</p>
<h3>
<strong>Consent</strong>
</h3>
<p>
Facebook and other tech companies often deflect accusations against excessive data collection by arguing "consent" — that they harvest and use data with the consent of the users involved.
</p>
<p>
But even if we accept that clicking through a "Terms of Service" that <a href="https://tosdr.org/">no one reads</a> can actually constitute true consent, even if we ignore the fact that these terms are overwhelmingly one-sided and non-negotiable, and even if we accept that it's meaningful for people to give consent when sharing data about other people who may have also opted in — what is the recourse for someone who has not opted into these systems at all?
</p>
<p>
Are those of us who have explicitly avoided agreeing to the Facebook terms of service simply fair game for an industry-wide surveillance and targeting network?
</p>
<h3>
<strong>Privilege</strong>
</h3>
<p>
I dont mean to critique people who have created a Facebook profile or suggest they deserve whatever they get.
</p>
<p>
My ability to avoid Facebook comes from privilege — I have existing social contacts with whom I know how to stay in touch without using Facebook's network. My job does not require that I use Facebook. I can afford the time and expense to communicate with my electoral representatives and political allies via other channels.
</p>
<p>
Many people do not have these privileges and are compelled to "opt in" on Facebook's non-negotiable terms.
</p>
<p>
Many journalists, organizers, schools, politicians, and others who have good reasons to oppose Facebook's centralized social control feel compelled by Facebook's reach and scale to participate in their practices, even those we know to be harmful. That includes the ACLU.
</p>
<p>
Privacy should not be a luxury good, and while I'm happy to encourage people to opt out of these subtle and socially fraught arrangements, I do not argue that anyone who has signed up has somehow relinquished concerns about their privacy. We need to evaluate privacy concerns in their full social contexts. These are not problems that can be resolved on an individual level, because of the interpersonal nature of much of this data and the complexities of the tradeoffs involved.
</p>
<h3>
<strong>Technical countermeasures</strong>
</h3>
<p>
While they may not solve the problem, there are some technical steps people can take to limit the scope of these surveillance practices. For example, some web browsers do not send "third-party cookies" by default, or <a href="https://wiki.mozilla.org/Thirdparty">they scope cookies</a> so that centralized surveillance doesn't get a single view of one user. The most privacy-preserving modern browser is <a href="https://www.torproject.org/">the Tor Browser</a>, which everyone should have installed and available, even if it's not the browser they choose to use every day. It limits the surveillance ability of systems that you have not signed up for to track you as you move around the web.
</p>
<p>
You can also modify some browsers — for example, with plug-ins for <a href="https://requestpolicycontinued.github.io/">Firefox</a> and <a href="https://chrome.google.com/webstore/detail/umatrix/ogfcmafjalglgifnmanfmnieipoejdcf">Chrome</a> — so that they <a href="https://addons.mozilla.org/en-US/firefox/addon/umatrix/">do not send third-party</a> <a href="https://requestpolicycontinued.github.io/">requests at all</a>. Firefox is also exploring even more <a href="https://addons.mozilla.org/en-US/firefox/addon/multi-account-containers/">privacy-preserving techniques</a><a href="https://addons.mozilla.org/en-US/firefox/addon/multi-account-containers/">.</a>
</p>
<p>
It cant be denied, though, that these tools are harder to use than the web browsers most people are accustomed to, and they create barriers to some online activities. (For example, logging in to <a href="https://offcampushousing.uconn.edu/login">some sites</a> and accessing some <a href="https://filestore.community.support.microsoft.com/api/images/0253d8fb-b050-401a-834d-9d80a99c0b12">web applications</a> is impossible without third-party cookies.)
</p>
<p>
Some website operators take their visitors' privacy more seriously than others, by reducing the amount of third-party requests. For example, it's possible to display "share on Facebook" or "Like" buttons without sending user requests to Facebook in the first place. The ACLU's own website does this because we believe that the right to read with privacy is a fundamental protection for civic discourse.
</p>
<p>
If you are responsible for running a website, try browsing it with a third-party-blocking extension turned on. Think about how much information you're requiring your users to send to third parties as a condition for using your site. If you care about being a good steward of your visitors' data, you can re-design your website to reduce this kind of leakage.
</p>
<h3>
<strong>Opting out?</strong>
</h3>
<p>
Some advertisers claim that you can "opt out" of their targeted advertising, and even offer <a href="http://optout.aboutads.info/">a centralized place meant to help you do so</a>. However, my experience with these tools isn't a positive one. They don't appear to work all of the time. (In a recent experiment I conducted, two advertisers opt-out mechanisms failed to take effect.) And while advertisers claim to allow the user to opt out of "interest-based ads," it's not clear that the opt-outs govern data collection itself, rather than just the use of the collected data for displaying ads. Moreover, opting out on their terms requires the use of third-party cookies, thereby enabling another mechanism that other advertisers can then exploit.
</p>
<p>
It's also not clear how they function over time: How frequently do I need to take these steps? Do they expire? How often should I check back to make sure Im still opted out? I'd much prefer an approach requiring me to opt <em>in</em> to surveillance and targeting.
</p>
<h3>
<strong>Fix the surveillance economy, not just Facebook</strong>
</h3>
<p>
These are just a few of the mechanisms that enable online tracking. Facebook is just one culprit in this online "surveillance economy," albeit a massive one — the company owns <a href="https://www.instagram.com/">Instagram</a>, <a href="https://atlassolutions.com/">Atlas</a>, <a href="https://www.whatsapp.com/">WhatsApp</a>, and dozens of other internet and technology companies and services. But its not the only player in this space. Googles business model also relies on this kind of surveillance, and there are dozens of smaller players as well.
</p>
<p>
As we work to address the fallout from the current storm around Facebook and Cambridge Analytica, we can't afford to lose sight of these larger mechanisms at play. Cambridge Analytica's failures and mistakes are inherent to Facebook's business model. We need to seriously challenge the social structures that encourage people to opt in to this kind of surveillance. At the same time, we also need to protect those of us who manage to opt out.
</p>
</div>

File diff suppressed because one or more lines are too long

@ -0,0 +1,3 @@
[
"http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-640x426.jpg"
]

@ -0,0 +1,8 @@
{
"Author": "by Dan Goodin - Apr 16, 2015 8:02 pm UTC",
"Direction": null,
"Excerpt": "Two-year-old bug exposes thousands of servers to crippling attack.",
"Image": "http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-640x426.jpg",
"Title": "Just-released Minecraft exploit makes it easy to crash game servers",
"SiteName": "Ars Technica"
}

@ -0,0 +1,44 @@
<div itemprop="articleBody">
<figure><img height="331" src="http://cdn.arstechnica.net/wp-content/uploads/2015/04/server-crash-640x426.jpg" width="640"></img><figcaption>
</figcaption></figure><p>A flaw in the wildly popular online game <em>Minecraft</em> makes it easy for just about anyone to crash the server hosting the game, according to a computer programmer who has released proof-of-concept code that exploits the vulnerability.</p>
<p>"I thought a lot before writing this post," Pakistan-based developer Ammar Askar wrote in a <a href="http://blog.ammaraskar.com/minecraft-vulnerability-advisory">blog post published Thursday</a>, 21 months, he said, after privately reporting the bug to <em>Minecraft</em> developer Mojang. "On the one hand I don't want to expose thousands of servers to a major vulnerability, yet on the other hand Mojang has failed to act on it."</p>
<p>The bug resides in the <a href="https://github.com/ammaraskar/pyCraft">networking internals of the <em>Minecraft </em>protocol</a>. It allows the contents of inventory slots to be exchanged, so that, among other things, items in players' hotbars are displayed automatically after logging in. <em>Minecraft</em> items can also store arbitrary metadata in a file format known as <a href="http://wiki.vg/NBT">Named Binary Tag (NBT)</a>, which allows complex data structures to be kept in hierarchical nests. Askar has released <a href="https://github.com/ammaraskar/pyCraft/tree/nbt_exploit">proof-of-concept attack code</a> he said exploits the vulnerability to crash any server hosting the game. Here's how it works.</p>
<blockquote>
<p>The vulnerability stems from the fact that the client is allowed to send the server information about certain slots. This, coupled with the NBT formats nesting allows us to <em>craft</em> a packet that is incredibly complex for the server to deserialize but trivial for us to generate.</p>
<p>In my case, I chose to create lists within lists, down to five levels. This is a json representation of what it looks like.</p>
<div> <pre><code data-lang="javascript"><span>rekt</span><span>:</span> <span>{</span>
<span>list</span><span>:</span> <span>[</span>
<span>list</span><span>:</span> <span>[</span>
<span>list</span><span>:</span> <span>[</span>
<span>list</span><span>:</span> <span>[</span>
<span>list</span><span>:</span> <span>[</span>
<span>list</span><span>:</span> <span>[</span>
<span>]</span>
<span>list</span><span>:</span> <span>[</span>
<span>]</span>
<span>list</span><span>:</span> <span>[</span>
<span>]</span>
<span>list</span><span>:</span> <span>[</span>
<span>]</span>
<span>...</span>
<span>]</span>
<span>...</span>
<span>]</span>
<span>...</span>
<span>]</span>
<span>...</span>
<span>]</span>
<span>...</span>
<span>]</span>
<span>...</span>
<span>}</span></code></pre> </div>
<p>The root of the object, <code>rekt</code>, contains 300 lists. Each list has a list with 10 sublists, and each of those sublists has 10 of their own, up until 5 levels of recursion. Thats a total of <code>10^5 * 300 = 30,000,000</code> lists.</p>
<p>And this isnt even the theoretical maximum for this attack. Just the nbt data for this payload is 26.6 megabytes. But luckily Minecraft implements a way to compress large packets, lucky us! zlib shrinks down our evil data to a mere 39 kilobytes.</p>
<p>Note: in previous versions of Minecraft, there was no protocol wide compression for big packets. Previously, NBT was sent compressed with gzip and prefixed with a signed short of its length, which reduced our maximum payload size to <code>2^15 - 1</code>. Now that the length is a varint capable of storing integers up to <code>2^28</code>, our potential for attack has increased significantly.</p>
<p>When the server will decompress our data, itll have 27 megs in a buffer somewhere in memory, but that isnt the bit thatll kill it. When it attempts to parse it into NBT, itll create java representations of the objects meaning suddenly, the sever is having to create several million java objects including ArrayLists. This runs the server out of memory and causes tremendous CPU load.</p>
<p>This vulnerability exists on almost all previous and current Minecraft versions as of 1.8.3, the packets used as attack vectors are the <a href="http://wiki.vg/Protocol#Player_Block_Placement">0x08: Block Placement Packet</a> and <a href="http://wiki.vg/Protocol#Creative_Inventory_Action">0x10: Creative Inventory Action</a>.</p>
<p>The fix for this vulnerability isnt exactly that hard, the client should never really send a data structure as complex as NBT of arbitrary size and if it must, some form of recursion and size limits should be implemented.</p>
<p>These were the fixes that I recommended to Mojang 2 years ago.</p>
</blockquote>
<p>Ars is asking Mojang for comment and will update this post if company officials respond.</p>
</div>

@ -0,0 +1,765 @@
<!DOCTYPE html>
<!--[if lt IE 7]> <html lang="en-us" class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]> <html lang="en-us" class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]> <html lang="en-us" class="no-js ie8 lt-ie9"> <![endif]-->
<!--[if IE 9]> <html lang="en-us" class="no-js ie9"> <![endif]-->
<!--[if gt IE 8]><!-->
<html lang="en-us">
<!--<![endif]-->
<head>
<title>Just-released Minecraft exploit makes it easy to crash game servers | Ars Technica</title>
<script type="text/javascript">
ars = {
"ASSETS": "http:\/\/cdn.arstechnica.net\/wp-content\/themes\/arstechnica\/assets",
"HOME_URL": "http:\/\/arstechnica.com",
"LOGIN_URL": "https:\/\/arstechnica.com\/services\/login-desktop.html?v=1",
"CIVIS": "\/civis",
"THEME": "light",
"VIEW": "grid",
"MOBILE": false,
"PREMIER": false,
"LOGGED": false,
"ENV": "production",
"AD": {
"kw": ["security", "int"],
"zone": "int",
"queue": []
},
"TOTAL": 68014,
"UNREAD": 0,
"RECENT": [659465, 659425, 659391, 659203, 659339, 659209, 659151, 659207, 659257, 659153, 657603, 659157, 659089, 659105, 658987, 658981, 658367, 658019, 658841, 658609, 658117, 658553, 658455, 657769, 658395],
"LOGINS": true,
"CROSS": false,
"GEOALERTS": true,
"PARSELY": "arstechnica.com",
"COMMENTS": false,
"HOMEPAGE": false,
"COUNTRY": "us",
"READY": [],
"SHOW_ADS": true,
"IMG_PROXY": "https:\/\/cdn.arstechnica.net\/i\/",
"CATEGORY": "security"
};
</script>
<!--[if lte IE 8]><script type="text/javascript" src="http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/js/modernizr/modernizr.js"></script><![endif]-->
<link rel="stylesheet" type="text/css" media="all" href="http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/css/ars.min.55e632421d8225142fe8df15cdfe2a20.css"/>
<link rel="alternate" type="application/rss+xml" href="http://feeds.arstechnica.com/arstechnica/index/"/>
<link rel="shortcut icon" href="https://cdn.arstechnica.net/favicon.ico"/>
<link rel="icon" type="image/x-icon" href="https://cdn.arstechnica.net/favicon.ico"/>
<link rel="apple-touch-icon" sizes="180x180" href="http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/images/ars-ios-icon.png"/>
<link rel="icon" sizes="192x192" href="http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/images/material-ars.png"/>
<meta name="application-name" content="Ars Technica"/>
<meta name="msapplication-starturl" content="http://arstechnica.com/"/>
<meta name="msapplication-tooltip" content="Ars Technica: Serving the technologist for 1.2 decades"/>
<meta name="msapplication-task" content="name=News;action-uri=http://arstechnica.com/;icon-uri=https://cdn.arstechnica.net/favicon.ico"/>
<meta name="msapplication-task" content="name=Features;action-uri=http://arstechnica.com/features/;icon-uri=https://cdn.arstechnica.net/ie-jump-menu/jump-features.ico"/>
<meta name="msapplication-task" content="name=OpenForum;action-uri=http://arstechnica.com/civis/;icon-uri=https://cdn.arstechnica.net/ie-jump-menu/jump-forum.ico"/>
<meta name="msapplication-task" content="name=Subscribe;action-uri=http://arstechnica.com/subscriptions/;icon-uri=https://cdn.arstechnica.net/ie-jump-menu/jump-subscribe.ico"/>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<meta name="advertising" content="ask"/>
<meta property="fb:admins" content="592156917"/>
<meta name="format-detection" content="telephone=no"/>
<meta name="theme-color" content="#000000"/>
<meta name="viewport" content="width=1020"/>
<!-- cache hit 459:single/meta:b3538aec37c1a165d2b4b62bd58e56e3 -->
<meta name="parsely-page" content="{&quot;title&quot;:&quot;Just-released Minecraft exploit makes it easy to crash game servers&quot;,&quot;link&quot;:&quot;http:\/\/arstechnica.com\/security\/2015\/04\/16\/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers\/&quot;,&quot;type&quot;:&quot;post&quot;,&quot;author&quot;:&quot;Dan Goodin&quot;,&quot;post_id&quot;:648287,&quot;pub_date&quot;:&quot;2015-04-16T20:02:01Z&quot;,&quot;section&quot;:&quot;Risk Assessment&quot;,&quot;tags&quot;:[&quot;denial-of-service-attack&quot;,&quot;exploits&quot;,&quot;minecraft&quot;,&quot;vulnerabilities&quot;,&quot;type: report&quot;],&quot;image_url&quot;:&quot;http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-150x150.jpg&quot;}"/>
<meta name="parsely-metadata" content="{&quot;type&quot;:&quot;report&quot;,&quot;title&quot;:&quot;Just-released Minecraft exploit makes it easy to crash game servers&quot;,&quot;post_id&quot;:648287,&quot;lower_deck&quot;:&quot;Two-year-old bug exposes thousands of servers to crippling attack.&quot;,&quot;image_url&quot;:&quot;http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-150x150.jpg&quot;,&quot;listing_image_url&quot;:&quot;http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-300x150.jpg&quot;}"/>
<link rel="canonical" href="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/"/>
<link rel="shorturl" href="http://ars.to/1CSWnf5"/>
<meta name="description" content="Two-year-old bug exposes thousands of servers to crippling attack."/>
<meta name="twitter:card" content="summary_large_image"/>
<meta name="twitter:url" content="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/"/>
<meta name="twitter:title" content="Just-released Minecraft exploit makes it easy to crash game servers"/>
<meta name="twitter:description" content="Two-year-old bug exposes thousands of servers to crippling attack."/>
<meta name="twitter:site" content="@arstechnica"/>
<meta name="twitter:domain" content="arstechnica.com"/>
<meta property="og:site_name" content="Ars Technica"/>
<meta name="twitter:image:src" content="http://cdn.arstechnica.net/wp-content/uploads/2015/04/server-crash-640x426.jpg"/>
<meta name="twitter:image:width" content="640"/>
<meta name="twitter:image:height" content="426"/>
<meta name="twitter:creator" content="@dangoodin001"/>
<meta property="og:url" content="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/"/>
<meta property="og:title" content="Just-released Minecraft exploit makes it easy to crash game servers"/>
<meta property="og:image" content="http://cdn.arstechnica.net/wp-content/uploads/2015/04/server-crash-640x426.jpg"/>
<meta property="og:description" content="Two-year-old bug exposes thousands of servers to crippling attack."/>
<meta property="og:type" content="article"/>
<!-- cache hit 459:single/header:b3538aec37c1a165d2b4b62bd58e56e3 -->
<script type="text/javascript" src="http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/js/omniture/mbox.js"></script>
</head>
<body class="single single-post postid-648287 single-format-standard grid-view light blog-us">
<div id="container">
<header id="masthead">
<aside id="ad-top">
<div id="topBanner728x90_frame"></div>
<script type="text/javascript">
ars.AD.queue.push(['topBanner', {
sz: '728x90',
kws: [],
collapse: true
}]);
</script>
</aside>
<h1><a href="http://arstechnica.com"><em>Ars</em>Technica</a></h1>
<div id="profile">
<!-- cache hit 459:header/site-toggle:f8ff57a97275618649c08b2cce8f06a6 -->
<ul class="site-toggle">
<li class="site-1 selected"><a href="http://arstechnica.com/?return">Ars Technica</a></li>
<li class="site-3"><a href="http://arstechnica.co.uk">Ars Technica UK</a></li>
</ul> <a href="/civis/ucp.php?mode=register" rel="nofollow">Register</a> <a id="login" href="http://arstechnica.com/civis/ucp.php?mode=login&amp;return_to=http%3A%2F%2Farstechnica.com%2Fsecurity%2F2015%2F04%2F16%2Fjust-released-minecraft-exploit-makes-it-easy-to-crash-game-servers%2F" rel="nofollow">Log in</a> </div>
<nav id="primary">
<ul>
<li id="home-icon"> <a href="/"><span>Home</span></a> </li>
<li class="has-children"> <a href="#">Main Menu</a>
<div id="main-menu" class="dropdown">
<div id="sections">
<ul class="cat-list">
<li class="top-row even">
<a class="cat-link" href="/information-technology"> <span class="cat-name">Information Technology</span> <span class="subheading cat-desc">Technology Lab</span> </a>
</li>
<li class="top-row odd">
<a class="cat-link" href="/gadgets"> <span class="cat-name">Product News &amp; Reviews</span> <span class="subheading cat-desc">Gear &amp; Gadgets</span> </a>
</li>
<li class="even">
<a class="cat-link" href="/business"> <span class="cat-name">Business of Technology</span> <span class="subheading cat-desc">Ministry of Innovation</span> </a>
</li>
<li class="odd">
<a class="cat-link" href="/security"> <span class="cat-name">Security &amp; Hacktivism</span> <span class="subheading cat-desc">Risk Assessment</span> </a>
</li>
<li class="even">
<a class="cat-link" href="/tech-policy"> <span class="cat-name">Civilization &amp; Discontents</span> <span class="subheading cat-desc">Law &amp; Disorder</span> </a>
</li>
<li class="odd">
<a class="cat-link" href="/apple"> <span class="cat-name">The Apple Ecosystem</span> <span class="subheading cat-desc">Infinite Loop</span> </a>
</li>
<li class="even">
<a class="cat-link" href="/gaming"> <span class="cat-name">Gaming &amp; Entertainment</span> <span class="subheading cat-desc">Opposable Thumbs</span> </a>
</li>
<li class="odd">
<a class="cat-link" href="/science"> <span class="cat-name">Science &amp; Exploration</span> <span class="subheading cat-desc">The Scientific Method</span> </a>
</li>
<li class="even">
<a class="cat-link" href="/cars"> <span class="cat-name">All Things Automotive</span> <span class="subheading cat-desc">Cars Technica</span> </a>
</li>
</ul>
</div>
<aside class="drop-extras">
<style type="text/css">
#layout-swap {
margin-top: -6px;
color: #808f95
}
</style>
<aside id="layout-swap"> <span class="subheading">Layout:</span>
<ul>
<li class="grid active"> <a href="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?view=grid"><span>Grid View</span></a> </li>
<li class="article"> <a href="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?view=archive"><span>Article View</span></a> </li>
</ul>
</aside>
<h2 class="subheading notched">Site Theme</h2>
<ul id="theme-switch">
<li class="light active"> <a href="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?theme=light"><span class="subheading">Dark on light</span></a> </li>
<li class="dark "> <a href="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?theme=dark"><span class="subheading">Light on dark</span></a> </li>
</ul>
<div id="explore-ars">
<h2 class="subheading notched">Explore Ars</h2>
<ul>
<!-- <li><a href="/reviews/">Reviews</a></li> -->
<li><a href="/video/">Video</a></li>
<li><a href="/staff/">Staff Blogs</a></li>
<li><a href="/features/">Feature Archive</a></li>
<li><a href="/staff-directory/">Staff Directory</a></li>
<li><a href="/contact-us/">Contact Us</a></li>
</ul>
</div>
<div id="featured-disciplines">
<h2 class="subheading notched">Featured Disciplines</h2>
<ul>
<li><a href="/discipline/photography/">Photography</a></li>
<li><a href="/discipline/productivity/">Productivity</a></li>
<li><a href="/discipline/cloud-2/">Cloud</a></li>
<!-- <li><a href="/discipline/gadgets-3/">Gadgets</a></li> -->
<li><a href="/discipline/tablets-2/">Tablets</a></li>
</ul>
</div>
</aside>
</div>
</li>
<li class="has-children" id="my-stories"> <a href="#">My Stories: <span class="unread-count">0</span></a>
<div class="dropdown">
<nav class="my-stories-nav first">
<h2 class="subheading notched">New Since Last Visit <span class="unread-count"></span></h2>
<ol class="new-stories with-numbers"></ol>
<footer><a class="subheading" href="#" id="more-unread-stories">See more news stories</a></footer>
</nav>
<nav class="my-stories-nav middle">
<h2 class="subheading notched">We Recommend</h2>
<ol class="recommendations"></ol>
</nav>
<nav class="my-stories-nav">
<h2 class="subheading notched">My Discussions</h2>
<p class="discussions disabled">Log in to track your discussions.</p>
</nav>
</div>
</li>
<li class="no-children"><a href="http://arstechnica.com/civis/">Forums</a></li>
<li class="no-children subscribe"> <a href="/subscriptions/">Subscribe</a> </li>
<li class="no-children"><a href="/jobs/">Jobs</a></li>
<li class="no-children"><a href="/feature-series/chasing-brilliance/">Ars Consortium</a></li>
<li id="search-container" class="right inactive">
<a id="search-switch" href="/search/"></a>
<form action="/search/" method="GET" id="search_form">
<input type="hidden" name="ie" value="UTF-8"/>
<input type="text" name="q" id="hdr_search_input" value=""/>
<input type="submit" value=""/> </form>
<style type="text/css">
table.gstl_50.gssb_c {
top: 30px !important;
left: 0 !important;
width: 100% !important;
}
</style>
</li>
</ul>
</nav>
</header>
<section id="content" class="clearfix">
<!-- cache hit 459:home/toppost:f3fda06d4fb35e8aa360e369ff702613 -->
<h1 id="archive-head" class="subheading thick-divide-bottom">
<a href="http://arstechnica.com/security/"> <span class="archive-name">Risk Assessment</span>
<span class="divider"> / </span>
<span class="archive-desc">Security &amp; Hacktivism</span>
</a></h1>
<script type="text/javascript">
ars.ARTICLE = {
"url": "http:\/\/arstechnica.com\/security\/2015\/04\/16\/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers\/",
"short_url": "http:\/\/ars.to\/1CSWnf5",
"title": "Just-released Minecraft exploit makes it easy to crash game servers",
"author": 329388,
"id": 648287,
"topic": 1280621,
"pages": 1,
"current_page": 1,
"superscroll": false,
"promoted": [],
"single_page": false,
"comments": 75,
"fullwidth": false
};
</script>
<article itemscope="" itemtype="http://schema.org/NewsArticle" class="standalone">
<header>
<h1 class="heading" itemprop="headline">Just-released <i>Minecraft</i> exploit makes it easy to crash game servers</h1>
<h2 class="standalone-deck" itemprop="description">Two-year-old bug exposes thousands of servers to crippling attack.</h2>
<div class="post-meta">
<p class="byline" itemprop="author creator" itemscope="" itemtype="http://schema.org/Person"> by <a itemprop="url" href="http://arstechnica.com/author/dan-goodin/" rel="author"><span itemprop="name">Dan Goodin</span></a> - <span class="date" data-time="1429214521">Apr 16, 2015 8:02 pm UTC</span> </p>
<div class="corner-info">
<ul class="share-buttons">
<li class="share-facebook">
<a href="https://www.facebook.com/sharer.php?u=http%3A%2F%2Farstechnica.com%2Fsecurity%2F2015%2F04%2F16%2Fjust-released-minecraft-exploit-makes-it-easy-to-crash-game-servers%2F" target="_blank" data-dialog="400:368"> <span class="share-text">Share</span> </a>
</li>
<li class="share-twitter">
<a href="https://twitter.com/share?text=Just-released+Minecraft+exploit+makes+it+easy+to+crash+game+servers&amp;url=http%3A%2F%2Fars.to%2F1CSWnf5" target="_blank" data-dialog="364:250"> <span class="share-text">Tweet</span> </a>
</li>
<li class="share-google">
<a href="https://plus.google.com/share?url=http%3A%2F%2Farstechnica.com%2Fsecurity%2F2015%2F04%2F16%2Fjust-released-minecraft-exploit-makes-it-easy-to-crash-game-servers%2F" target="_blank" data-dialog="485:600"> <span class="share-text">Google</span> </a>
</li>
<li class="share-reddit">
<a href="https://www.reddit.com/submit?url=http%3A%2F%2Farstechnica.com%2Fsecurity%2F2015%2F04%2F16%2Fjust-released-minecraft-exploit-makes-it-easy-to-crash-game-servers%2F&amp;title=Just-released+Minecraft+exploit+makes+it+easy+to+crash+game+servers" target="_blank"> <span class="share-text">Reddit</span> </a>
</li>
</ul> <a title="51 posters participating" class="comment-count" href="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?comments=1"><span>75</span></a> </div>
</div>
</header>
<section id="article-guts">
<div itemprop="articleBody" class="article-content clearfix">
<figure class="intro-image image center full-width" style="width:640px"> <img src="http://cdn.arstechnica.net/wp-content/uploads/2015/04/server-crash-640x426.jpg" width="640" height="331"/>
<figcaption class="caption">
<div class="caption-credit"> <a rel="nofollow" href="https://en.wikipedia.org/wiki/Kernel_panic#/media/File:Kernel-panic.jpg">Kevin</a> </div>
</figcaption>
</figure>
<!-- cache hit 459:single/related:1ad28a5dc0a24868be6b031b5fdecb2e -->
<!-- empty -->
<p>A flaw in the wildly popular online game <em>Minecraft</em> makes it easy for just about anyone to crash the server hosting the game, according to a computer programmer who has released proof-of-concept code that exploits the vulnerability.</p>
<p>"I thought a lot before writing this post," Pakistan-based developer Ammar Askar wrote in a <a href="http://blog.ammaraskar.com/minecraft-vulnerability-advisory">blog post published Thursday</a>, 21 months, he said, after privately reporting the bug to <em>Minecraft</em> developer Mojang. "On the one hand I don't want to expose thousands of servers to a major vulnerability, yet on the other hand Mojang has failed to act on it."</p>
<p>The bug resides in the <a href="https://github.com/ammaraskar/pyCraft">networking internals of the <em>Minecraft </em>protocol</a>. It allows the contents of inventory slots to be exchanged, so that, among other things, items in players' hotbars are displayed automatically after logging in. <em>Minecraft</em> items can also store arbitrary metadata in a file format known as <a href="http://wiki.vg/NBT">Named Binary Tag (NBT)</a>, which allows complex data structures to be kept in hierarchical nests. Askar has released <a href="https://github.com/ammaraskar/pyCraft/tree/nbt_exploit">proof-of-concept attack code</a> he said exploits the vulnerability to crash any server hosting the game. Here's how it works.</p>
<blockquote>
<p>The vulnerability stems from the fact that the client is allowed to send the server information about certain slots. This, coupled with the NBT formats nesting allows us to <em>craft</em> a packet that is incredibly complex for the server to deserialize but trivial for us to generate.</p>
<p>In my case, I chose to create lists within lists, down to five levels. This is a json representation of what it looks like.</p>
<div class="highlight"> <pre><code class="language-javascript" data-lang="javascript"><span class="nx">rekt</span><span class="o">:</span> <span class="p">{</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="p">]</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="p">]</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="p">]</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">}</span></code></pre> </div>
<p>The root of the object, <code>rekt</code>, contains 300 lists. Each list has a list with 10 sublists, and each of those sublists has 10 of their own, up until 5 levels of recursion. Thats a total of <code>10^5 * 300 = 30,000,000</code> lists.</p>
<p>And this isnt even the theoretical maximum for this attack. Just the nbt data for this payload is 26.6 megabytes. But luckily Minecraft implements a way to compress large packets, lucky us! zlib shrinks down our evil data to a mere 39 kilobytes.</p>
<p>Note: in previous versions of Minecraft, there was no protocol wide compression for big packets. Previously, NBT was sent compressed with gzip and prefixed with a signed short of its length, which reduced our maximum payload size to <code>2^15 - 1</code>. Now that the length is a varint capable of storing integers up to <code>2^28</code>, our potential for attack has increased significantly.</p>
<p>When the server will decompress our data, itll have 27 megs in a buffer somewhere in memory, but that isnt the bit thatll kill it. When it attempts to parse it into NBT, itll create java representations of the objects meaning suddenly, the sever is having to create several million java objects including ArrayLists. This runs the server out of memory and causes tremendous CPU load.</p>
<p>This vulnerability exists on almost all previous and current Minecraft versions as of 1.8.3, the packets used as attack vectors are the <a href="http://wiki.vg/Protocol#Player_Block_Placement">0x08: Block Placement Packet</a> and <a href="http://wiki.vg/Protocol#Creative_Inventory_Action">0x10: Creative Inventory Action</a>.</p>
<p>The fix for this vulnerability isnt exactly that hard, the client should never really send a data structure as complex as NBT of arbitrary size and if it must, some form of recursion and size limits should be implemented.</p>
<p>These were the fixes that I recommended to Mojang 2 years ago.</p>
</blockquote>
<p>Ars is asking Mojang for comment and will update this post if company officials respond.</p>
</div>
<div class="article-expander">
<p><a href="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/">Expand full story</a></p>
</div>
</section>
<div id="article-footer-wrap">
<section id="comments-area">
<a name="comments-bar"></a>
<div class="comments-bar"> <a class="subheading comments-read-link" href="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?comments=1"><span class="text">Reader comments</span> <span class="comment-count"><span proptype="">75</span></span></a> </div>
<div id="comments-container"></div>
<div id="comments-posting-container" class="thick-divide-bottom">
<p id="reply">You must <a href="/civis/ucp.php?mode=login" class="vote_login">login or create an account</a> to comment.</p>
</div>
</section>
<aside class="thin-divide-bottom">
<ul class="share-buttons">
<li class="share-facebook">
<a href="https://www.facebook.com/sharer.php?u=http%3A%2F%2Farstechnica.com%2Fsecurity%2F2015%2F04%2F16%2Fjust-released-minecraft-exploit-makes-it-easy-to-crash-game-servers%2F" target="_blank" data-dialog="400:368"> <span class="share-text">Share</span>
<div class="share-count-container">
<div class="share-count">-</div>
</div>
</a>
</li>
<li class="share-twitter">
<a href="https://twitter.com/share?text=Just-released+Minecraft+exploit+makes+it+easy+to+crash+game+servers&amp;url=http%3A%2F%2Fars.to%2F1CSWnf5" target="_blank" data-dialog="364:250"> <span class="share-text">Tweet</span>
<div class="share-count-container">
<div class="share-count">-</div>
</div>
</a>
</li>
<li class="share-google">
<a href="https://plus.google.com/share?url=http%3A%2F%2Farstechnica.com%2Fsecurity%2F2015%2F04%2F16%2Fjust-released-minecraft-exploit-makes-it-easy-to-crash-game-servers%2F" target="_blank" data-dialog="485:600"> <span class="share-text">Google</span>
<div class="share-count-container">
<div class="share-count">-</div>
</div>
</a>
</li>
<li class="share-reddit">
<a href="https://www.reddit.com/submit?url=http%3A%2F%2Farstechnica.com%2Fsecurity%2F2015%2F04%2F16%2Fjust-released-minecraft-exploit-makes-it-easy-to-crash-game-servers%2F&amp;title=Just-released+Minecraft+exploit+makes+it+easy+to+crash+game+servers" target="_blank"> <span class="share-text">Reddit</span>
<div class="share-count-container">
<div class="share-count">-</div>
</div>
</a>
</li>
</ul>
</aside>
<!-- cache hit 459:single/author:ec67ae7d8397f22698e2822e36453902 -->
<section class="article-author clearfix-redux">
<a href="/author/dan-goodin"><img width="47" height="47" src="http://cdn.arstechnica.net/wp-content/uploads/authors/Dan-Goodin-sq.jpg"/></a>
<p><a href="/author/dan-goodin" class="author-name">Dan Goodin</a> / Dan is the Security Editor at Ars Technica, which he joined in 2012 after working for The Register, the Associated Press, Bloomberg News, and other publications.</p>
</section>
<table class="post-links thick-divide-top thin-divide-bottom clearfix-redux" cellspacing="0" cellpadding="0" border="0" width="100%">
<tbody>
<tr>
<td width="50%" class="subheading older"> <a href="http://arstechnica.com/tech-policy/2015/04/16/dozens-of-us-government-online-whistleblower-sites-not-secured-by-https/" rel="prev"><span class="arrow"></span> Older Story</a> </td>
<td class="subheading newer"> <a href="http://arstechnica.com/gaming/2015/04/16/hidden-files-suggest-street-fighters-ryu-may-come-to-smash-bros/" rel="next">Newer Story <span class="arrow"></span></a> </td>
</tr>
</tbody>
</table>
<footer id="article-footer">
<div id="instream300x255_frame"></div>
<script type="text/javascript">
ars.AD.queue.push(['instream', {
sz: '300x255',
kws: ["blogvertorial", "inStream"],
collapse: true
}]);
</script>
<h2 class="subheading notched">You May Also Like</h2>
<ul id="recommendations" class="clearfix-redux">
<!-- cache miss 459:single/sponsored-recs:885d56085f538f75cf5e9bcb302f399f -->
</ul>
</footer>
</div>
</article>
<section id="article-sidebar" class="column-1 right">
<!-- cache hit 459:column/article-bottom:b3538aec37c1a165d2b4b62bd58e56e3 -->
<aside class="side-ad thick-divide-bottom">
<div id="xrailTop300x250_frame"></div>
<script type="text/javascript">
ars.AD.queue.push(['xrailTop', {
sz: '300x250',
kws: ["top"],
collapse: true
}]);
</script>
</aside>
<h2 id="recent-featured-title" class="subheading notched">Latest Feature Story</h2>
<ul class="column">
<li class="post" id="post-658367">
<article class="in-column" data-post-id="658367">
<a href="http://arstechnica.com/apple/2015/05/03/review-the-absolutely-optional-apple-watch-and-watch-os-1-0/" class="headline-image"> <img src="http://cdn.arstechnica.net/wp-content/uploads/2015/05/DSC00588-300x100.jpg" width="300" height="100"/>
<h2 class="page-count subheading">Feature Story (7 pages)</h2>
<h1 class="heading">Review: The absolutely optional Apple Watch and Watch OS 1.0</h1> </a>
<p class="excerpt">A pragmatist's guide to a nice but not quite necessary gadget.</p>
</article>
</li>
<aside class="thick-divide-top">
<h2 class="subheading notched">Watch Ars Video</h2>
<article class="in-column">
<div class="column-video" data-player="2196096102001" data-video="4202322531001" data-key="AQ~~,AAAAlDCBGhk~,VcmqiTAuekrwPweJ20LLt7jwm8LxmhCE"> <img src="https://cdn.arstechnica.net/i/http://brightcove.vo.llnwd.net/v1/unsecured/media/636468927001/201504/641/636468927001_4202397654001_LG-G4.jpg?pubId=636468927001" width="300" height="169"/>
<div class="column-video-overlay"></div>
</div> <a href="http://arstechnica.com/gadgets/2015/04/28/hands-on-with-the-leather-backed-lg-g4/"><h1 class="heading videohead">Hands-on with the New LG G4</h1></a>
<p class="excerpt">LG goes with a wild rear design and a Snapdragon 808.</p>
</article>
</aside>
<li id="in-the-know">
<h2 class="subheading notched">Stay in the know with</h2>
<ul class="social clearfix">
<li class="fb">
<a href="https://www.facebook.com/arstechnica"></a>
</li>
<li class="twit">
<a href="https://twitter.com/arstechnica"></a>
</li>
<li class="gplus">
<a href="https://plus.google.com/+ArsTechnica/posts"></a>
</li>
<li class="email">
<a href="http://arstechnica.us1.list-manage.com/subscribe?u=af7f013bad7e785d15aab736f&amp;id=0adf3ee3d9"></a>
</li>
<li class="rss">
<a href="/rss-feeds/"></a>
</li>
</ul>
</li>
<h2 class="subheading notched">Latest News</h2>
<ol class="rail latest-stories">
<li>
<a href="http://arstechnica.com/information-technology/2015/05/04/microsoft-bangs-the-cybersecurity-drum-with-advanced-threat-analytics/">
<h2>Protecting networks from your own employees</h2>
<h1 class="heading">Microsoft bangs the cybersecurity drum with Advanced Threat Analytics</h1> </a>
</li>
<li>
<a href="http://arstechnica.com/information-technology/2015/05/04/windows-update-for-business-brings-windows-updates-to-your-business/">
<h2>WUB WUB WUB WUB</h2>
<h1 class="heading">Windows Update for Business brings Windows updates to your business</h1> </a>
</li>
<li>
<a href="http://arstechnica.com/security/2015/05/04/super-secretive-malware-wipes-hard-drive-to-prevent-analysis/">
<h2>INITIATE SELF-DESTRUCT SEQUENCE</h2>
<h1 class="heading">Super secretive malware wipes hard drive to prevent analysis</h1> </a>
</li>
<li>
<a href="http://arstechnica.com/gaming/2015/05/04/failed-christian-shoe-promoter-makes-anti-gay-first-person-shooter/"> <img src="http://cdn.arstechnica.net/wp-content/uploads/2015/05/Screen-Shot-2015-05-04-at-2.45.22-PM-150x150.png" width="50" height="50"/>
<h1 class="heading">Failed Christian shoe promoter makes anti-gay first-person shooter</h1> </a>
</li>
<li>
<a href="http://arstechnica.com/information-technology/2015/05/04/prime-minister-of-singapore-shares-his-c-code-for-sudoku-solver/">
<h2>#include stdio.h</h2>
<h1 class="heading">Prime Minister of Singapore shares his C++ code for Sudoku solver</h1> </a>
</li>
<li>
<a href="http://arstechnica.com/tech-policy/2015/05/04/9th-circuit-judges-rip-into-prenda-law-copyright-trolling-scheme/"> <img src="http://cdn.arstechnica.net/wp-content/uploads/2015/05/Screen-Shot-2015-05-04-at-11.09.05-AM-150x150.png" width="50" height="50"/>
<h1 class="heading">9th Circuit judges rip into Prenda law copyright trolling scheme</h1> </a>
</li>
<div style="display:none">
<div id="polar195x130_frame"></div>
<script type="text/javascript">
ars.AD.queue.push(['polar', {
sz: '195x130',
kws: [],
collapse: true
}]);
</script>
</div>
</ol>
<li>
<aside class="side-ad">
<div id="xrailBottom300x250_frame"></div>
<script type="text/javascript">
ars.AD.queue.push(['xrailBottom', {
sz: '300x250',
kws: ["bottom"],
collapse: true
}]);
</script>
</aside>
</li>
<li class="thick-divide-top thick-divide-bottom">
<aside class="side-ad">
<script type="text/javascript" language="JavaScript">
// <![CDATA[
google_ad_client = 'ca-conde_arstechnica';
google_ad_channel = 'ars_technica_standard_a';
google_language = 'en';
google_ad_width = '300';
google_ad_height = '250';
google_ad_type = 'text';
google_encoding = 'utf8';
google_safe = 'high';
google_adtest = 'off';
google_ad_format = '';
google_ad_section = 'default';
// ]]>
</script>
<script type="text/javascript" src="//pagead2.googlesyndication.com/pagead/show_ads.js" language="JavaScript"></script>
</aside>
</li>
</ul>
</section>
</section>
<footer id="page-footer">
<nav id="footer-nav" class="clearfix">
<div class="nav-section">
<h2 class="subheading">Site Links</h2>
<ul>
<li><a href="/about-us/">About Us</a></li>
<li><a href="/advertise-with-us/">Advertise with us</a></li>
<li><a href="/contact-us/">Contact Us</a></li>
<li><a href="/reprints/">Reprints</a></li>
</ul>
<h2 class="subheading">Subscriptions</h2>
<ul>
<li><a href="/subscriptions/">Subscribe to Ars</a></li>
</ul>
</div>
<div class="nav-section">
<h2 class="subheading">More Reading</h2>
<ul>
<li><a href="/rss-feeds/">RSS Feeds</a></li>
<li><a href="/newsletters/">Newsletters</a></li>
</ul>
</div>
<div class="nav-section">
<h2 class="subheading">Conde Nast Sites</h2>
<ul class="conde-nast-sites">
<li><a href="http://www.reddit.com/">Reddit</a></li>
<li><a href="http://www.wired.com/">Wired</a></li>
<li><a href="http://www.vanityfair.com/">Vanity Fair</a></li>
<li><a href="http://www.style.com/">Style</a></li>
<li><a href="http://www.details.com/">Details</a></li>
</ul>
<form method="get" action="#">
<select id="mag_list" name="mag_list" onchange="(this.options[this.selectedIndex].value) ? (window.location = this.options[this.selectedIndex].value) : null">
<option value="" selected="selected">Visit our sister sites</option>
<option value="">- - - - - - - - - - - - - -</option>
<option value="http://www.gq.com">GQ</option>
<option value="http://www.concierge.com">Concierge</option>
<option value="http://www.epicurious.com">Epicurious</option>
<option value="http://men.style.com">Men.Style.com</option>
<option value="http://www.style.com">Style.com</option>
<option value="http://www.wired.com">Wired.com</option>
<option value="http://www.lipstick.com">Lipstick.com</option>
<option value="http://www.nutritiondata.com">NutritionData</option>
<option value="http://www.allure.com">Allure</option>
<option value="http://www.architecturaldigest.com">Architectural Digest</option>
<option value="http://www.bonappetit.com">Bon Appétit</option>
<option value="http://www.brides.com">Brides</option>
<option value="http://www.portfolio.com">Condé Nast Portfolio</option>
<option value="http://www.glamour.com">Glamour</option>
<option value="http://www.golfdigest.com">Golf Digest</option>
<option value="http://www.golfworld.com">Golf World</option>
<option value="http://www.luckymag.com">Lucky</option>
<option value="http://www.self.com">Self</option>
<option value="http://www.teenvogue.com">Teen Vogue</option>
<option value="http://www.newyorker.com">The New Yorker</option>
<option value="http://www.vanityfair.com">Vanity Fair</option>
<option value="http://www.wmagazine.com">W</option>
</select>
</form>
<form method="get" action="#">
<select size="1" id="sub_list" name="sub_list" onchange="(this.options[this.selectedIndex].value) ? (window.location = this.options[this.selectedIndex].value) : null">
<option value="" selected="selected">Subscribe to a magazine</option>
<option value="http://www.magazinestoresubscriptions.com?source=univdropdown">View All Titles</option>
<option value="">- - - - - - - - - - - - - -</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_Allure?source=SITEFOOTER">Allure</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_ArchitecturalDigest">Architectural Digest</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_BonAppetite?source=SITEFOOTER">Bon Appétit</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_Brides?source=SITEFOOTER">Brides</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_CondeNastPortfolio?source=SITEFOOTER">Condé Nast Portfolio</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_CondeNastTraveler?source=SITEFOOTER">Condé Nast Traveler</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_Details?source=SITEFOOTER">Details</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_ElegantBride?source=SITEFOOTER">Elegant Bride</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_Glamour?source=SITEFOOTER">Glamour</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_GolfDigest?source=SITEFOOTER">Golf Digest</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_GolfWorld?source=SITEFOOTER">Golf World</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_GQ?source=SITEFOOTER">GQ</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_Lucky?source=SITEFOOTER">Lucky</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_ModernBride?source=SITEFOOTER">Modern Bride</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_Self?source=SITEFOOTER">Self</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_TeenVogue?source=SITEFOOTER">Teen Vogue</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_NewYorker?source=SITEFOOTER">The New Yorker</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_VanityFair?source=SITEFOOTER">Vanity Fair</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_Vogue?source=SITEFOOTER">Vogue</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_W?source=SITEFOOTER">W</option>
<option value="https://www.magazinestoresubscriptions.com/webapp/wcs/stores/servlet/Subscriptions_Wired?source=SITEFOOTER">Wired</option>
</select>
</form>
</div>
<div class="nav-section" id="mobile-site">
<h2 class="subheading"><a href="http://arstechnica.com/security/2015/04/16/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?view=mobile">View Mobile Site</a></h2> </div>
</nav>
<p style="text-align:center;margin-top:30px;margin-bottom:0">
<a href="http://condenast.com"><img src="http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/images/condenast-logo.png" width="131" height="19"/></a>
</p>
<div id="copyright-terms"> © 2015 Condé Nast. All rights reserved
<br/> Use of this Site constitutes acceptance of our <a href="http://www.condenast.com/privacy-policy" target="_blank">User Agreement</a> (effective 1/2/14) and <a href="http://www.condenast.com/privacy-policy#privacypolicy" target="_blank">Privacy Policy</a> (effective 1/2/14), and <a href="/amendment-to-conde-nast-user-agreement-privacy-policy/">Ars Technica Addendum (effective 5/17/2012)</a>
<br/> <a href="http://www.condenast.com/privacy-policy#privacypolicy-california" target="_blank">Your California Privacy Rights</a>
<br/> The material on this site may not be reproduced, distributed, transmitted, cached or otherwise used, except with the prior written permission of Condé Nast.
<br/>
<br/> <a href="http://www.condenast.com/privacy-policy#privacypolicy-optout" target="_blank">Ad Choices</a><img width="10" height="10" border="0" src="http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/images/ad_choices_arrow.png"/> </div>
</footer>
</div>
<script type="text/javascript" src="//www.google.com/jsapi?autoload={'modules':[{'name':'search','version':'1','packages':[],'language':'en'}]}"></script>
<script type="text/javascript" src="http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/js/ars.min.4963c9cfd2e7a5799f3b8c40325988b4.js"></script>
<!-- what the christ -->
<script type="text/javascript" src="//www.googletagservices.com/tag/js/gpt.js"></script>
<script type="text/javascript" src="http://cdn.arstechnica.net/ads/js/cn.dart.bun.min.js"></script>
<script type="text/javascript">
(function() {
if ("CN" in window) {
if (ars.MOBILE && "UAParser" in window) {
var ua = new UAParser();
if (ua.getOS().name == "Android" && ua.getBrowser().name == "Chrome") {
return;
}
}
CN.site.init({
code: "ars",
title: "Ars",
name: ars.MOBILE ? "ars.mobile" : "ars",
env: ars.ENV === "production" ? "PROD" : "DEV",
debug: ars.ENV !== "production"
});
CN.dart.init({
site: CN.site.name + '.dart',
zone: ars.AD.zone,
kws: ars.AD.kw,
gptCallback: function(e) {
ars.sda.ad_loaded(e);
}
});
CN.dart.getCommon()["domDelay"]["defaultVal"] = 100;
for (var i = 0; i &lt; ars.AD.queue.length; i++) {
var ad = ars.AD.queue[i],
id = ad[0],
args = ad[1];
if ($('#' + id + args.sz + "_frame").length) CN.dart.call(id, args);
}
ars.AD.queue = [];
}
})();
</script>
<script type="text/javascript">
CN.ad.polar.article = function(Handlebars, depth0, helpers, partials, data) {
this.compilerInfo = [4, '>= 1.0.0'];
helpers = this.merge(helpers, Handlebars.helpers);
data = data || {};
var buffer = "",
stack1, stack2, functionType = "function",
escapeExpression = this.escapeExpression,
self = this;
function program1(depth0, data) {
var buffer = "",
stack1;
buffer += "\n &lt;span style=\"width:50px; height:50px; overflow:hidden; display:inline-block; float:left; margin:2px 10px 5px 0\">\n &lt;img src=\"" + escapeExpression(((stack1 = ((stack1 = depth0.image), stack1 == null || stack1 === false ? stack1 : stack1.href)), typeof stack1 === functionType ? stack1.apply(depth0) : stack1)) + "\" style=\"float:none; margin:0; height:50px; width:auto;\" />\n &lt;/span>\n ";
return buffer;
}
buffer += "&lt;li>\n &lt;a href=\"";
if (stack1 = helpers.link) {
stack1 = stack1.call(depth0, {
hash: {},
data: data
});
} else {
stack1 = depth0.link;
stack1 = typeof stack1 === functionType ? stack1.apply(depth0) : stack1;
}
buffer += escapeExpression(stack1) + "\">\n &lt;h2 style=\"color:#00A3D3;\">Sponsored by: &lt;span style=\"text-transform:none;\">" + escapeExpression(((stack1 = ((stack1 = depth0.sponsor), stack1 == null || stack1 === false ? stack1 : stack1.name)), typeof stack1 === functionType ? stack1.apply(depth0) : stack1)) + "&lt;/span>&lt;/h2>\n ";
stack2 = helpers['if'].call(depth0, ((stack1 = depth0.image), stack1 == null || stack1 === false ? stack1 : stack1.href), {
hash: {},
inverse: self.noop,
fn: self.program(1, program1, data),
data: data
});
if (stack2 || stack2 === 0) {
buffer += stack2;
}
buffer += "\n &lt;h1 class=\"heading\">";
if (stack2 = helpers.title) {
stack2 = stack2.call(depth0, {
hash: {},
data: data
});
} else {
stack2 = depth0.title;
stack2 = typeof stack2 === functionType ? stack2.apply(depth0) : stack2;
}
buffer += escapeExpression(stack2) + "&lt;/h1>\n &lt;/a>\n&lt;/li>";
return buffer;
};
</script>
<!-- cache hit 459:single/javascript-footer:1ad28a5dc0a24868be6b031b5fdecb2e -->
<noscript>
<a href="http://www.omniture.com" title="Web Analytics"><img src="http://condenast.112.2o7.net/b/ss/condenet-dev/1/H.15.1--NS/0" height="1" width="1" border="0" alt="" /></a>
</noscript>
<!-- Google Analytics start -->
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(
['_setAccount', 'UA-31997-1'], ['_setCustomVar', 1, 'view', "grid"], ['_setCustomVar', 2, 'theme', "light"], ['_setCustomVar', 3, 'logged_in', "false"], ['_setCustomVar', 4, 'show_comments', "false"], ['_setCustomVar', 5, 'is_premier', "false"], ['_trackPageview']);
(function() {
var ga = document.createElement('script');
ga.type = 'text/javascript';
ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(ga, s);
})();
</script>
<!-- Google Analytics end -->
<!-- Parse.ly start -->
<script type="text/javascript">
(function(d) {
var site = "arstechnica.com",
b = d.body,
e = d.createElement("div");
e.innerHTML = '<span id="parsely-cfg" data-parsely-site="' + site + '"></span>';
e.id = "parsely-root";
e.style.display = "none";
b.appendChild(e);
})(document);
(function(s, p, d) {
var h = d.location.protocol,
i = p + "-" + s,
e = d.getElementById(i),
r = d.getElementById(p + "-root"),
u = h === "https:" ? "d1z2jf7jlzjs58.cloudfront.net" : "static." + p + ".com";
if (e) return;
e = d.createElement(s);
e.id = i;
e.async = true;
e.src = h + "//" + u + "/p.js";
r.appendChild(e);
})("script", "parsely", document);
</script>
<!-- Parse.ly end -->
</body>
</html>

@ -0,0 +1,6 @@
{
"0": "http:\/\/fakehost\/test\/base\/foo\/bar\/baz.png",
"2": "http:\/\/fakehost\/foo\/bar\/baz.png",
"3": "http:\/\/test\/foo\/bar\/baz.png",
"4": "https:\/\/test\/foo\/bar\/baz.png"
}

@ -0,0 +1,8 @@
{
"Author": null,
"Direction": null,
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
"Image": null,
"Title": "Base URL with base relative test",
"SiteName": null
}

@ -0,0 +1,33 @@
<article>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>Links</p>
<p><a href="http://fakehost/test/base/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/test/base/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="http://fakehost/test/base/baz.html#foo">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="http://fakehost/test/base/foo/bar/baz.png"></img></p>
<p><img src="http://fakehost/test/base/foo/bar/baz.png"></img></p>
<p><img src="http://fakehost/foo/bar/baz.png"></img></p>
<p><img src="http://test/foo/bar/baz.png"></img></p>
<p><img src="https://test/foo/bar/baz.png"></img></p>
<h2>Foo</h2>
<p>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>

@ -0,0 +1,44 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<base href="base/"/>
<title>Base URL with base relative test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
<p>Links</p>
<p><a href="foo/bar/baz.html">link</a></p>
<p><a href="./foo/bar/baz.html">link</a></p>
<p><a href="/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="baz.html#foo">link</a></p>
<p><a href="/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="foo/bar/baz.png"/></p>
<p><img src="./foo/bar/baz.png"/></p>
<p><img src="/foo/bar/baz.png"/></p>
<p><img src="http://test/foo/bar/baz.png"/></p>
<p><img src="https://test/foo/bar/baz.png"/></p>
<h2>Foo</h2>
<div>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
</article>
</body>
</html>

@ -0,0 +1,5 @@
{
"0": "http:\/\/fakehost\/foo\/bar\/baz.png",
"3": "http:\/\/test\/foo\/bar\/baz.png",
"4": "https:\/\/test\/foo\/bar\/baz.png"
}

@ -0,0 +1,8 @@
{
"Author": null,
"Direction": null,
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
"Image": null,
"Title": "Base URL with base test",
"SiteName": null
}

@ -0,0 +1,33 @@
<article>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>Links</p>
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="http://fakehost/baz.html#foo">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="http://fakehost/foo/bar/baz.png"></img></p>
<p><img src="http://fakehost/foo/bar/baz.png"></img></p>
<p><img src="http://fakehost/foo/bar/baz.png"></img></p>
<p><img src="http://test/foo/bar/baz.png"></img></p>
<p><img src="https://test/foo/bar/baz.png"></img></p>
<h2>Foo</h2>
<p>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>

@ -0,0 +1,44 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<base href="/"/>
<title>Base URL with base test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
<p>Links</p>
<p><a href="foo/bar/baz.html">link</a></p>
<p><a href="./foo/bar/baz.html">link</a></p>
<p><a href="/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="baz.html#foo">link</a></p>
<p><a href="/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="foo/bar/baz.png"/></p>
<p><img src="./foo/bar/baz.png"/></p>
<p><img src="/foo/bar/baz.png"/></p>
<p><img src="http://test/foo/bar/baz.png"/></p>
<p><img src="https://test/foo/bar/baz.png"/></p>
<h2>Foo</h2>
<div>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
</article>
</body>
</html>

@ -0,0 +1,6 @@
{
"0": "http:\/\/fakehost\/test\/foo\/bar\/baz.png",
"2": "http:\/\/fakehost\/foo\/bar\/baz.png",
"3": "http:\/\/test\/foo\/bar\/baz.png",
"4": "https:\/\/test\/foo\/bar\/baz.png"
}

@ -0,0 +1,8 @@
{
"Author": null,
"Direction": null,
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
"Image": null,
"Title": "Base URL test",
"SiteName": null
}

@ -0,0 +1,33 @@
<article>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>Links</p>
<p><a href="http://fakehost/test/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/test/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="http://fakehost/test/baz.html#foo">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="http://fakehost/test/foo/bar/baz.png"></img></p>
<p><img src="http://fakehost/test/foo/bar/baz.png"></img></p>
<p><img src="http://fakehost/foo/bar/baz.png"></img></p>
<p><img src="http://test/foo/bar/baz.png"></img></p>
<p><img src="https://test/foo/bar/baz.png"></img></p>
<h2>Foo</h2>
<p>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>

@ -0,0 +1,43 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>Base URL test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
<p>Links</p>
<p><a href="foo/bar/baz.html">link</a></p>
<p><a href="./foo/bar/baz.html">link</a></p>
<p><a href="/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="baz.html#foo">link</a></p>
<p><a href="/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="foo/bar/baz.png"/></p>
<p><img src="./foo/bar/baz.png"/></p>
<p><img src="/foo/bar/baz.png"/></p>
<p><img src="http://test/foo/bar/baz.png"/></p>
<p><img src="https://test/foo/bar/baz.png"/></p>
<h2>Foo</h2>
<div>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
</article>
</body>
</html>

@ -0,0 +1,8 @@
{
"Author": null,
"Direction": null,
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua.",
"Image": null,
"Title": "Basic tag cleaning test",
"SiteName": null
}

@ -0,0 +1,18 @@
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p>Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div><div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>

@ -0,0 +1,36 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>Basic tag cleaning test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p>Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<iframe src="about:blank">Iframe fallback test</iframe>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
<h2>Foo</h2>
<div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<object data="foo.swf" type="application/x-shockwave-flash" width="88" height="31">
<param movie="foo.swf" />
</object>
<embed src="foo.swf"/>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
</article>
</body>
</html>

@ -0,0 +1,7 @@
{
"0": "http:\/\/ichef.bbci.co.uk\/news\/1024\/cpsprodpb\/3D8B\/production\/_84455751_84455749.jpg",
"1": "http:\/\/ichef.bbci.co.uk\/news\/555\/cpsprodpb\/462D\/production\/_84456971_gettyimages-167501087.jpg",
"2": "http:\/\/ichef.bbci.co.uk\/news\/555\/media\/images\/76020000\/jpg\/_76020974_line976.jpg",
"3": "http:\/\/ichef-1.bbci.co.uk\/news\/555\/cpsprodpb\/6D3D\/production\/_84456972_p072315al-0500.jpg",
"5": "http:\/\/ichef-1.bbci.co.uk\/news\/555\/cpsprodpb\/142FD\/production\/_84458628_shirtreuters.jpg"
}

@ -0,0 +1,8 @@
{
"Author": null,
"Direction": null,
"Excerpt": "President Barack Obama tells the BBC his failure to pass \"common sense gun safety laws\" is the greatest frustration of his presidency.",
"Image": "http:\/\/ichef.bbci.co.uk\/news\/1024\/cpsprodpb\/3D8B\/production\/_84455751_84455749.jpg",
"Title": "Obama admits US gun laws are his 'biggest frustration' - BBC News",
"SiteName": "BBC News"
}

@ -0,0 +1,17 @@
<div property="articleBody">
<p>President Barack Obama has admitted that his failure to pass "common sense gun safety laws" in the US is the greatest frustration of his presidency. </p><p>In an interview with the BBC, Mr Obama said it was "distressing" not to have made progress on the issue "even in the face of repeated mass killings".</p><p>He vowed to keep trying, but the BBC's North America editor Jon Sopel said the president did not sound very confident. </p><p>However, Mr Obama said race relations had improved during his presidency. </p><p>Hours after the interview, a gunman opened fire at a cinema in the US state of Louisiana, killing two people and injuring several others before shooting himself.</p><p>In a wide-ranging interview, President Obama also said:</p><ul><li>
<a href="http://www.bbc.co.uk/news/uk-politics-33647154">The UK must stay in the EU</a> to have influence on the world stage</li>
<li>He is confident the Iran nuclear deal will be passed by Congress </li>
<li>Syria needs a political solution in order to defeat the Islamic State group</li>
<li>He would speak "bluntly" against corruption <a href="http://www.bbc.co.uk/news/world-us-canada-33646563">and human rights violations in Kenya</a>
</li>
<li>He would defend his advocacy of gay rights following protests in Kenya</li>
<li>Despite racial tensions, the US is becoming more diverse and more tolerant</li>
</ul><p><a href="http://www.bbc.co.uk/news/world-us-canada-33646542">Read the full transcript of his interview</a></p><p>Mr Obama lands in Kenya later on Friday for his first visit since becoming president. </p><p>But with just 18 months left in power, he said gun control was the area where he has been "most frustrated and most stymied" since coming to power in 2009.</p><p>"If you look at the number of Americans killed since 9/11 by terrorism, it's less than 100. If you look at the number that have been killed by gun violence, it's in the tens of thousands," Mr Obama said. </p><figure><img alt="Gun control campaigners protest in McPhearson Square in Washington DC - 25 April 2013" datasrc="http://ichef.bbci.co.uk/news/976/cpsprodpb/462D/production/_84456971_gettyimages-167501087.jpg" height="549" src="http://ichef.bbci.co.uk/news/555/cpsprodpb/462D/production/_84456971_gettyimages-167501087.jpg" width="976"></img><figcaption><span>
The president said he would continue fighting for greater gun control laws
</span>
</figcaption></figure><p>"For us not to be able to resolve that issue has been something that is distressing," he added. </p><p>Mr Obama has pushed for stricter gun control throughout his presidency but has been unable to secure any significant changes to the laws. </p><p>After nine African-American churchgoers were killed in South Carolina in June, he admitted "politics in this town" meant there were few options available.</p><figure><img alt="line" datasrc="http://ichef.bbci.co.uk/news/464/media/images/76020000/jpg/_76020974_line976.jpg" height="2" src="http://ichef.bbci.co.uk/news/555/media/images/76020000/jpg/_76020974_line976.jpg" width="464"></img></figure><h2>Analysis: Jon Sopel, BBC News, Washington</h2><figure><img alt="President Barack Obama participates in an interview with Jon Sopel of BBC in the Roosevelt Room of the White House - 23 July 2015" datasrc="http://ichef-1.bbci.co.uk/news/976/cpsprodpb/6D3D/production/_84456972_p072315al-0500.jpg" height="549" src="http://ichef-1.bbci.co.uk/news/555/cpsprodpb/6D3D/production/_84456972_p072315al-0500.jpg" width="976"></img></figure><p>Nine months ago, the president seemed like a spent force, after taking a beating in the midterm elections, during which members of his own party were reluctant to campaign on his record. </p><p>But the man sat before me today was relaxed and confident, buoyed by a string of "wins" on healthcare, Cuba and Iran, after bitter and ongoing battles with his many critics. </p><p>The only body swerve the president performed was when I asked him <a href="http://www.bbc.co.uk/news/world-us-canada-33643168"> how many minds he had changed on the Iran nuclear deal </a>after an intense sell aimed at Gulf allies and members of US Congress who remain implacably opposed. </p><p>There was a momentary flicker across the president's face as if to say "You think you got me?" before his smile returned and he proceeded to talk about how Congress would come round.</p><p>But notably, he did not give a direct answer to that question, which leaves me with the impression that he has persuaded precisely zero.</p><p><a href="http://www.bbc.co.uk/news/world-us-canada-33646875">Five things we learned from Obama interview</a></p><p><a href="http://www.bbc.co.uk/news/world-us-canada-33646545">The presidential body swerve</a></p><figure><img alt="line" datasrc="http://ichef.bbci.co.uk/news/464/media/images/76020000/jpg/_76020974_line976.jpg" height="2" src="http://ichef.bbci.co.uk/news/555/media/images/76020000/jpg/_76020974_line976.jpg" width="464"></img></figure><p>On race relations, Mr Obama said recent concerns around policing and mass incarcerations were "legitimate and deserve intense attention" but insisted progress had been made. </p><p>Children growing up during the eight years of his presidency "will have a different view of race relations in this country and what's possible," he said. </p><p>"There are going to be tensions that arise. But if you look at my daughters' generation, they have an attitude about race that's entirely different than even my generation."</p><p>Talking about how he was feeling after his recent successes, he said "every president, every leader has strengths and weaknesses". </p><p>"One of my strengths is I have a pretty even temperament. I don't get too high when it's high and I don't get too low when it's low," he said. </p><figure><img alt="Customer looks at Obama shirts at a stall in Nairobi's Kibera slums, 23 July 2015" datasrc="http://ichef-1.bbci.co.uk/news/976/cpsprodpb/142FD/production/_84458628_shirtreuters.jpg" height="549" src="http://ichef-1.bbci.co.uk/news/555/cpsprodpb/142FD/production/_84458628_shirtreuters.jpg" width="976"></img><figcaption><span>
Kenya is getting ready to welcome the US president
</span>
</figcaption></figure><h2>Kenya trip</h2><p>Mr Obama was speaking to the BBC at the White House before departing for Kenya.</p><p>His father was Kenyan and the president is expected to meet relatives in Nairobi.</p><p>Mr Obama has faced criticism in the country after the US legalised gay marriage. However, in his interview, the president said he would not fall silent on the issue.</p><p>"I am not a fan of discrimination and bullying of anybody on the basis of race, on the basis of religion, on the basis of sexual orientation or gender," he said.</p><p>The president also admitted that some African governments, including Kenya's, needed to improve their records on human rights and democracy. However, he defended his decision to engage with and visit those governments. </p><p>"Well, they're not ideal institutions. But what we found is, is that when we combined blunt talk with engagement, that gives us the best opportunity to influence and open up space for civil society." </p><p>Mr Obama will become the first US president to address the African Union when he travels on to Ethiopia on Sunday.</p>
</div>

File diff suppressed because one or more lines are too long

@ -0,0 +1,6 @@
[
"https:\/\/1.bp.blogspot.com\/-YIPC5jkXkDE\/Vy7YPSqFKWI\/AAAAAAAAAxI\/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB\/w1200-h630-p-nu\/block-diagram.png",
"https:\/\/1.bp.blogspot.com\/-YIPC5jkXkDE\/Vy7YPSqFKWI\/AAAAAAAAAxI\/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB\/s640\/block-diagram.png",
"https:\/\/1.bp.blogspot.com\/-k3naUT3uXao\/Vy7WFac246I\/AAAAAAAAAw8\/mePy_ostO8QJra5ZJrbP2WGhTlJ0B_r8gCLcB\/s640\/schematic-from-hell.png",
"https:\/\/2.bp.blogspot.com\/-kIekczO693g\/Vy7dBqYifXI\/AAAAAAAAAxc\/hMNJBs5bedIQOrBzzkhq4gbmhR-n58EQwCLcB\/s400\/graph-labels.png"
]

@ -0,0 +1,8 @@
{
"Author": null,
"Direction": null,
"Excerpt": "I've written a couple of posts in the past few months but they were all for the blog at work so I figured I'm long overdue for one on Silic...",
"Image": "https:\/\/1.bp.blogspot.com\/-YIPC5jkXkDE\/Vy7YPSqFKWI\/AAAAAAAAAxI\/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB\/w1200-h630-p-nu\/block-diagram.png",
"Title": "Open Verilog flow for Silego GreenPak4 programmable logic devices",
"SiteName": null
}

@ -0,0 +1,57 @@
<div id="post-body-932306423056216142" itemprop="description articleBody"><p>
I've written a couple of posts in the past few months but they were all for <a href="http://blog.ioactive.com/search/label/Andrew%20Zonenberg">the blog at work</a> so I figured I'm long overdue for one on Silicon Exposed.</p><h2>
So what's a GreenPak?</h2>
<p> Silego Technology is a fabless semiconductor company located in the SF Bay area, which makes (among other things) a line of programmable logic devices known as GreenPak. Their <a href="http://www.silego.com/products/greenpak5.html">5th generation parts</a> were just announced, but I started this project before that happened so I'm still targeting the <a href="http://www.silego.com/products/greenpak4.html">4th generation</a>.</p><p> GreenPak devices are kind of like itty bitty <a href="http://www.cypress.com/products/32-bit-arm-cortex-m-psoc">PSoCs</a> - they have a mixed signal fabric with an ADC, DACs, comparators, voltage references, plus a digital LUT/FF fabric and some typical digital MCU peripherals like counters and oscillators (but no CPU).</p><p> It's actually an interesting architecture - FPGAs (including some devices marketed as CPLDs) are a 2D array of LUTs connected via wires to adjacent cells, and true (product term) CPLDs are a star topology of AND-OR arrays connected by a crossbar. GreenPak, on the other hand, is a star topology of LUTs, flipflops, and analog/digital hard IP connected to a crossbar.</p><p> Without further ado, here's a block diagram showing all the cool stuff you get in the SLG46620V:</p><table readabilityDataTable="0"><tbody><tr><td>
<a href="https://1.bp.blogspot.com/-YIPC5jkXkDE/Vy7YPSqFKWI/AAAAAAAAAxI/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB/s1600/block-diagram.png" imageanchor="1"><img height="512" src="https://1.bp.blogspot.com/-YIPC5jkXkDE/Vy7YPSqFKWI/AAAAAAAAAxI/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB/s640/block-diagram.png" width="640"></img></a>
</td>
</tr><tr><td>SLG46620V block diagram (from device datasheet)</td>
</tr></tbody></table><p>
They're also tiny (the SLG46620V is a 20-pin 0.4mm pitch STQFN measuring 2x3 mm, and the lower gate count SLG46140V is a mere 1.6x2 mm) and probably the cheapest programmable logic device on the market - $0.50 in low volume and less than $0.40 in larger quantities.</p><p> The Vdd range of GreenPak4 is huge, more like what you'd expect from an MCU than an FPGA! It can run on anything from 1.8 to 5V, although performance is only specified at 1.8, 3.3, and 5V nominal voltages. There's also a dual-rail version that trades one of the GPIO pins for a second power supply pin, allowing you to interface to logic at two different voltage levels.</p><p> To support low-cost/space-constrained applications, they even have the configuration memory on die. It's one-time programmable and needs external Vpp to program (presumably Silego didn't want to waste die area on charge pumps that would only be used once) but has a SRAM programming mode for prototyping.</p><p> The best part is that the development software (GreenPak Designer) is free of charge and provided for all major operating systems including Linux! Unfortunately, the only supported design entry method is schematic entry and there's no way to write your design in a HDL.</p><p> While schematics may be fine for quick tinkering on really simple designs, they quickly get unwieldy. The nightmare of a circuit shown below is just a bunch of counters hooked up to LEDs that blink at various rates.</p><table readabilityDataTable="0"><tbody><tr><td>
<a href="https://1.bp.blogspot.com/-k3naUT3uXao/Vy7WFac246I/AAAAAAAAAw8/mePy_ostO8QJra5ZJrbP2WGhTlJ0B_r8gCLcB/s1600/schematic-from-hell.png" imageanchor="1"><img height="334" src="https://1.bp.blogspot.com/-k3naUT3uXao/Vy7WFac246I/AAAAAAAAAw8/mePy_ostO8QJra5ZJrbP2WGhTlJ0B_r8gCLcB/s640/schematic-from-hell.png" width="640"></img></a>
</td>
</tr><tr><td>Schematic from hell!</td>
</tr></tbody></table><p>
As if this wasn't enough of a problem, the largest GreenPak4 device (the SLG46620V) is split into two halves with limited routing between them, and the GUI doesn't help the user manage this complexity at all - you have to draw your schematic in two halves and add "cross connections" between them.</p><p> The icing on the cake is that schematics are a pain to diff and collaborate on. Although GreenPak schematics are XML based, which is a touch better than binary, who wants to read a giant XML diff and try to figure out what's going on in the circuit?</p><p> This isn't going to be a post on the quirks of Silego's software, though - that would be boring. As it turns out, there's one more exciting feature of these chips that I didn't mention earlier: the configuration bitstream is 100% documented in the device datasheet! This is unheard of in the programmable logic world. As Nick of Arachnid Labs <a href="http://www.arachnidlabs.com/blog/2015/03/30/greenpak/">says</a>, the chip is "just dying for someone to write a VHDL or Verilog compiler for it". As you can probably guess by from the title of this post, I've been busy doing exactly that.</p><h2>
Great! How does it work?</h2>
<p> Rather than wasting time writing a synthesizer, I decided to write a GreenPak technology library for Clifford Wolf's excellent open source synthesis tool, <a href="http://www.clifford.at/yosys/">Yosys</a>, and then make a place-and-route tool to turn that into a final netlist. The post-PAR netlist can then be loaded into GreenPak Designer in order to program the device.</p><p> The first step of the process is to run the "synth_greenpak4" Yosys flow on the Verilog source. This runs a generic RTL synthesis pass, then some coarse-grained extraction passes to infer shift register and counter cells from behavioral logic, and finally maps the remaining logic to LUT/FF cells and outputs a JSON-formatted netlist.</p><p> Once the design has been synthesized, my tool (named, surprisingly, gp4par) is then launched on the netlist. It begins by parsing the JSON and constructing a directed graph of cell objects in memory. A second graph, containing all of the primitives in the device and the legal connections between them, is then created based on the device specified on the command line. (As of now only the SLG46620V is supported; the SLG46621V can be added fairly easily but the SLG46140V has a slightly different microarchitecture which will require a bit more work to support.)</p><p> After the graphs are generated, each node in the netlist graph is assigned a numeric label identifying the type of cell and each node in the device graph is assigned a list of legal labels: for example, an I/O buffer site is legal for an input buffer, output buffer, or bidirectional buffer.</p><table readabilityDataTable="0"><tbody><tr><td>
<a href="https://2.bp.blogspot.com/-kIekczO693g/Vy7dBqYifXI/AAAAAAAAAxc/hMNJBs5bedIQOrBzzkhq4gbmhR-n58EQwCLcB/s1600/graph-labels.png" imageanchor="1"><img height="141" src="https://2.bp.blogspot.com/-kIekczO693g/Vy7dBqYifXI/AAAAAAAAAxc/hMNJBs5bedIQOrBzzkhq4gbmhR-n58EQwCLcB/s400/graph-labels.png" width="400"></img></a>
</td>
</tr><tr><td>Example labeling for a subset of the netlist and device graphs</td>
</tr></tbody></table><p>
The labeled nodes now need to be placed. The initial placement uses a simple greedy algorithm to create a valid (although not necessarily optimal or even routable) placement:</p><ol><li>Loop over the cells in the netlist. If any cell has a LOC constraint, which locks the cell to a specific physical site, attempt to assign the node to the specified site. If the specified node is the wrong type, doesn't exist, or is already used by another constrained node, the constraint is invalid so fail with an error.</li>
<li>Loop over all of the unconstrained cells in the netlist and assign them to the first unused site with the right label. If none are available, the design is too big for the device so fail with an error.</li>
</ol><p>
Once the design is placed, the placement optimizer then loops over the design and attempts to improve it. A simulated annealing algorithm is used, where changes to the design are accepted unconditionally if they make the placement better, and with a random, gradually decreasing probability if they make it worse. The optimizer terminates when the design receives a perfect score (indicating an optimal placement) or if it stops making progress for several iterations. Each iteration does the following:</p><ol><li>Compute a score for the current design based on the number of unroutable nets, the amount of routing congestion (number of nets crossing between halves of the device), and static timing analysis (not yet implemented, always zero).</li>
<li>Make a list of nodes that contributed to this score in some way (having some attached nets unroutable, crossing to the other half of the device, or failing timing).</li>
<li>Remove nodes from the list that are LOC'd to a specific location since we're not allowed to move them.</li>
<li>Remove nodes from the list that have only one legal placement in the device (for example, oscillator hard IP) since there's nowhere else for them to go.</li>
<li>Pick a node from the remainder of the list at random. Call this our pivot.</li>
<li>Find a list of candidate placements for the pivot: </li>
<ol><li>Consider all routable placements in the other half of the device.</li>
<li>If none were found, consider all routable placements anywhere in the device.</li>
<li>If none were found, consider all placements anywhere in the device even if they're not routable.</li>
</ol><li>Pick one of the candidates at random and move the pivot to that location. If another cell in the netlist is already there, put it in the vacant site left by the pivot.</li>
<li>Re-compute the score for the design. If it's better, accept this change and start the next iteration.</li>
<li>If the score is worse, accept it with a random probability which decreases as the iteration number goes up. If the change is not accepted, restore the previous placement.</li>
</ol><p>
After optimization, the design is checked for routability. If any edges in the netlist graph don't correspond to edges in the device graph, the user probably asked for something impossible (for example, trying to hook a flipflop's output to a comparator's reference voltage input) so fail with an error.</p><p> The design is then routed. This is quite simple due to the crossbar structure of the device. For each edge in the netlist:</p><ol><li>If dedicated (non-fabric) routing is used for this path, configure the destination's input mux appropriately and stop.</li>
<li>If the source and destination are in the same half of the device, configure the destination's input mux appropriately and stop.</li>
<li>A cross-connection must be used. Check if we already used one to bring the source signal to the other half of the device. If found, configure the destination to route from that cross-connection and stop.</li>
<li>Check if we have any cross-connections left going in this direction. If they're all used, the design is unroutable due to congestion so fail with an error.</li>
<li>Pick the next unused cross-connection and configure it to route from the source. Configure the destination to route from the cross-connection and stop.</li>
</ol><p>
Once routing is finished, run a series of post-PAR design rule checks. These currently include the following:</p><ul><li>If any node has no loads, generate a warning</li>
<li>If an I/O buffer is connected to analog hard IP, fail with an error if it's not configured in analog mode.</li>
<li>Some signals (such as comparator inputs and oscillator power-down controls) are generated by a shared mux and fed to many loads. If different loads require conflicting settings for the shared mux, fail with an error.</li>
</ul><p>
If DRC passes with no errors, configure all of the individual cells in the netlist based on the HDL parameters. Fail with an error if an invalid configuration was requested.</p><p> Finally, generate the bitstream from all of the per-cell configuration and write it to a file.</p><h2>
Great, let's get started!</h2><p>
If you don't already have one, you'll need to buy a <a href="http://www.silego.com/buy/index.php?main_page=product_info&amp;products_id=388">GreenPak4 development kit</a>. The kit includes samples of the SLG46620V (among other devices) and a programmer/emulation board. While you're waiting for it to arrive, install <a href="http://www.silego.com/softdoc/software.html">GreenPak Designer</a>.</p><p> Download and install Yosys. Although Clifford is pretty good at merging my pull requests, only <a href="https://github.com/azonenberg/yosys/">my fork on Github</a> is guaranteed to have the most up-to-date support for GreenPak devices so don't be surprised if you can't use a bleeding-edge feature with mainline Yosys.</p><p> Download and install gp4par. You can get it from <a href="https://github.com/azonenberg/openfpga/">the Github repository</a>.</p><p> Write your HDL, compile with Yosys, P&amp;R with gp4par, and import the bitstream into GreenPak Designer to program the target device. The most current gp4par manual is included in LaTeX source form in the source tree and is automatically built as part of the compile process. If you're just browsing, there's a <a href="http://thanatos.virtual.antikernel.net/unlisted/gp4-hdl.pdf">relatively recent PDF version</a> on my web server.</p><p> If you'd like to see the Verilog that produced the nightmare of a schematic I showed above, <a href="https://github.com/azonenberg/openfpga/blob/master/tests/greenpak4/Blinky/Blinky.v">here it is</a>.</p><p> Be advised that this project is still very much a work in progress and there are still a number of SLG46620V features I don't support (see the manual for exact details).</p><h2>
I love it / it segfaulted / there's a problem in the manual!</h2><p>
Hop in our IRC channel (##openfpga on Freenode) and let me know. Feedback is great, pull requests are even better,</p><h2>
You're competing with Silego's IDE. Have they found out and sued you yet?</h2><p>
Nope. They're fully aware of what I'm doing and are rolling out the red carpet for me. They love the idea of a HDL flow as an alternative to schematic entry and are pretty amazed at how fast it's coming together.</p><p> After I reported a few bugs in their datasheets they decided to skip the middleman and give me direct access to the engineer who writes their documentation so that I can get faster responses. The last time I found a problem (two different parts of the datasheet contradicted each other) an updated datasheet was in my inbox and on their website by the next day. I only wish Xilinx gave me that kind of treatment!</p><p> They've even <a href="https://twitter.com/SilegoTech/status/717018987771469824">offered me free hardware</a> to help me add support for their latest product family, although I plan to get GreenPak4 support to a more stable state before taking them up on the offer.</p><h2>
So what's next?</h2>
<p> Better testing, for starters. I have to verify functionality by hand with a DMM and oscilloscope, which is time consuming.</p><p> My contact at Silego says they're going to be giving me documentation on the SRAM emulation interface soon, so I'm going to make a hardware-in-loop test platform that connects to my desktop and the Silego ZIF socket, and lets me load new bitstreams via a scriptable interface. It'll have FPGA-based digital I/O as well as an ADC and DAC on every device pin, plus an adjustable voltage regulator for power, so I can feed in arbitrary mixed-signal test waveforms and write PC-based unit tests to verify correct behavior.</p><p> Other than that, I want to finish support for the SLG46620V in the next month or two. The SLG46621V will be an easy addition since only one pin and the relevant configuration bits have changed from the 46620 (I suspect they're the same die, just bonded out differently).</p><p> Once that's done I'll have to do some more extensive work to add the SLG46140V since the architecture is a bit different (a lot of the combinatorial logic is merged into multi-function blocks). Luckily, the 46140 has a lot in common architecturally with the GreenPak5 family, so once that's done GreenPak5 will probably be a lot easier to add support for.</p><p> My thanks go out to Clifford Wolf, whitequark, the IRC users in ##openfpga, and everyone at Silego I've worked with to help make this possible. I hope that one day this project will become mature enough that Silego will ship it as an officially supported extension to GreenPak Designer, making history by becoming the first modern programmable logic vendor to ship a fully open source synthesis and P&amp;R suite.
</p>
</div>

@ -0,0 +1,4 @@
[
"http:\/\/media.breitbart.com\/media\/2016\/11\/GettyImages-621866810.jpg",
"http:\/\/media.breitbart.com\/media\/2016\/11\/GettyImages-621866810-640x480.jpg"
]

@ -0,0 +1,8 @@
{
"Author": "by Lucas Nolan22 Dec 2016651",
"Direction": null,
"Excerpt": "Snopes fact checker and staff writer David Emery posted to Twitter asking if there were “any un-angry Trump supporters?”",
"Image": "http:\/\/media.breitbart.com\/media\/2016\/11\/GettyImages-621866810.jpg",
"Title": "'Neutral' Snopes Fact-Checker David Emery: 'Are There Any Un-Angry Trump Supporters?' - Breitbart",
"SiteName": "Breitbart"
}

@ -0,0 +1,41 @@
<div>
<figure><div><p><img alt="Supporters of Republican presidential nominee Donald Trump cheer during election night at the New York Hilton Midtown in New York on November 9, 2016. / AFP / JIM WATSON (Photo credit should read JIM WATSON/AFP/Getty Images)" height="480" itemprop="image" src="http://media.breitbart.com/media/2016/11/GettyImages-621866810-640x480.jpg" width="640"></img></p><p>JIM WATSON/AFP/Getty Images</p>
</div>
</figure> <time datetime="2016-12-22T10:43:37Z">22 Dec, 2016</time><time datetime="2016-12-22T18:59:12Z">22 Dec, 2016</time></div><div>
<div id="EmailOptin">
<p><span>SIGN UP</span> FOR OUR NEWSLETTER</p>
</div>
<h2><span>Snopes fact checker and staff writer David Emery posted to Twitter asking if there were “any un-angry Trump supporters?”</span></h2>
<p><span>Emery, a writer for partisan “fact-checking” website Snopes.com which soon will be in charge of labelling </span><a href="http://www.breitbart.com/tech/2016/12/15/facebook-introduce-warning-labels-stories-deemed-fake-news/"><span>“fake news”</span></a><span> alongside ABC News and Politifact, retweeted an article by Vulture magazine relating to the </span><a href="http://www.breitbart.com/big-hollywood/2016/11/19/boycotthamilton-trends-hamilton-cast-members-harass-mike-pence/"><span>protests</span></a><span> of the <em>Hamilton</em> musical following the decision by the cast of the show to make a </span><a href="http://www.breitbart.com/big-hollywood/2016/11/19/tolerance-hamilton-cast-lectures-mike-pence-broadway-stage/"><span>public announcement</span></a><span> to Vice-president elect Mike Pence while he watched the performance with his family.</span></p>
<div id="EmailOptinM">
<p><span>SIGN UP</span> FOR OUR NEWSLETTER</p>
</div>
<p><span>The tweet from Vulture magazine reads, “</span><a href="https://twitter.com/hashtag/Hamilton?src=hash" rel="noopener" target="_blank"><span>#Hamilton</span></a><span> Chicago show interrupted by angry Trump supporter.” Emery retweeted the story, saying, “Are there un-angry Trump supporters?”</span></p>
<p><span>This isnt the first time the Snopes.com writer has expressed anti-Trump sentiment on his Twitter page. In another tweet in which Emery links to an article that falsely attributes a quote to President-elect Trump, Emery states, “Incredibly, some people actually think they have to put words in Trumps mouth to make him look bad.”</span></p>
<p><span>Emery also retweeted an article by <em>New York</em> magazine that claimed President-elect Trump relied on lies to win during his campaign and that we now lived in a “post-truth” society. “Before long well all have forgotten what it was like to live in the same universe; or maybe we already have,” Emery tweeted.</span></p>
<p><span>Facebook believe that Emery, along with other Snopes writers, ABC News, and </span><a href="http://www.breitbart.com/tech/2016/12/16/flashback-weekly-standard-data-shows-politifact-has-it-out-for-republicans/"><span>Politifact</span></a><span> are impartial enough to label and silence what they believe to be “fake news” on social media. </span></p>
<p><i><span>Lucas Nolan is a reporter for Breitbart Tech covering issues of free speech and online censorship. Follow him on Twitter </span></i><a href="http://twitter.com/lucasnolan_" rel="noopener" target="_blank"><i><span>@LucasNolan_</span></i></a><i><span> or email him at </span></i><a href="http://www.breitbart.com/wp-admin/blank"><i><span>lnolan@breitbart.com</span></i></a></p>
</div>

File diff suppressed because one or more lines are too long

@ -0,0 +1,9 @@
[
"https:\/\/static.independent.co.uk\/s3fs-public\/thumbnails\/image\/2015\/12\/06\/10\/bed-hotel-room.jpg",
"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2014\/03\/18\/10\/bandb2.jpg",
"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2015\/05\/26\/11\/hotel-door-getty.jpg",
"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2013\/07\/31\/15\/luggage-3.jpg",
"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2015\/04\/13\/11\/Lifestyle-hotels.jpg",
"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2014\/03\/13\/16\/agenda7.jpg",
"http:\/\/fakehost\/sites\/all\/themes\/ines_themes\/independent_theme\/img\/reuse.png"
]

@ -0,0 +1,8 @@
{
"Author": "Hazel Sheffield",
"Direction": null,
"Excerpt": "Most people go to hotels for the pleasure of sleeping in a giant bed with clean white sheets and waking up to fresh towels in the morning. But those towels and sheets might not be as clean as they look, according to the hotel bosses that responded to an online thread about the things hotel owners dont want you to know.",
"Image": "https:\/\/static.independent.co.uk\/s3fs-public\/thumbnails\/image\/2015\/12\/06\/10\/bed-hotel-room.jpg",
"Title": "Seven secrets that hotel owners don't want you to know",
"SiteName": "The Independent"
}

@ -0,0 +1,136 @@
<div id="gigya-share-btns-2_gig_containerParent" itemprop="articleBody">
<p>Most people go to hotels for the pleasure of sleeping in a giant bed with clean white sheets and waking up to fresh towels in the morning.</p>
<p>But those towels and sheets might not be as clean as they look, according to the hotel bosses that responded to an online thread about the things hotel owners dont want you to know.</p>
<p>Zeev Sharon and Michael Forrest Jones both run hotel start-ups in the US. Forrest Jones runs the start-up Beechmont Hotels Corporation, a hotel operating company that consults with hotel owners on how they can improve their business. Sharon is the CEO of Hotelied, a start-up that allows people to sign up for discounts at luxury hotels.</p>
<p>But even luxury hotels arent always cleaned as often as they should be.</p>
<p>Here are some of the secrets that the receptionist will never tell you when you check in, according to answers posted on <a href="https://www.quora.com/What-are-the-things-we-dont-know-about-hotel-rooms" target="_blank">Quora</a>.</p>
<div>
<div>
<p><img alt="bandb2.jpg" height="423" src="https://static.independent.co.uk/s3fs-public/styles/story_medium/public/thumbnails/image/2014/03/18/10/bandb2.jpg" title="bandb2.jpg" width="564"></img></p>
</div>
<div>
<p>Even posh hotels might not wash a blanket in between stays
</p>
</div>
</div>
<p>1. Take any blankets or duvets off the bed</p>
<p>Forrest Jones said that anything that comes into contact with any of the previous guests skin should be taken out and washed every time the room is made, but that even the fanciest hotels dont always do so. "Hotels are getting away from comforters. Blankets are here to stay, however. But some hotels are still hesitant about washing them every day if they think they can get out of it," he said.</p>
<div>
<p>Video shows bed bug infestation at New York hotel</p>
</div>
<div>
<div>
<p><img alt="hotel-door-getty.jpg" height="423" src="https://static.independent.co.uk/s3fs-public/styles/story_medium/public/thumbnails/image/2015/05/26/11/hotel-door-getty.jpg" title="hotel-door-getty.jpg" width="564"></img></p>
</div>
<div>
<p>Forrest Jones advised stuffing the peep hole with a strip of rolled up notepaper when not in use.
</p>
</div>
</div>
<p>2. Check the peep hole has not been tampered with</p>
<p>This is not common, but can happen, Forrest Jones said. He advised stuffing the peep hole with a strip of rolled up notepaper when not in use. When someone knocks on the door, the paper can be removed to check who is there. If no one is visible, he recommends calling the front desk immediately. “I look forward to the day when I can tell you to choose only hotels where every employee who has access to guestroom keys is subjected to a complete public records background check, prior to hire, and every year or two thereafter. But for now, I can't,” he said.</p>
<div>
<div>
<p><img alt="luggage-3.jpg" height="423" src="https://static.independent.co.uk/s3fs-public/styles/story_medium/public/thumbnails/image/2013/07/31/15/luggage-3.jpg" title="luggage-3.jpg" width="564"></img></p>
</div>
</div>
<p>3. Dont use a wooden luggage rack</p>
<p>Bedbugs love wood. Even though a wooden luggage rack might look nicer and more expensive than a metal one, its a breeding ground for bugs. Forrest Jones says guests should put the items they plan to take from bags on other pieces of furniture and leave the bag on the floor.</p>
<div>
<div>
<p><img alt="Lifestyle-hotels.jpg" height="423" src="https://static.independent.co.uk/s3fs-public/styles/story_medium/public/thumbnails/image/2015/04/13/11/Lifestyle-hotels.jpg" title="Lifestyle-hotels.jpg" width="564"></img></p>
</div>
<div>
<p>The old rule of thumb is that for every 00 invested in a room, the hotel should charge in average daily rate
</p>
</div>
</div>
<p>4. Hotel rooms are priced according to how expensive they were to build</p>
<p>Zeev Sharon said that the old rule of thumb is that for every $1000 invested in a room, the hotel should charge $1 in average daily rate. So a room that cost $300,000 to build, should sell on average for $300/night.</p>
<h3>5. Beware the wall-mounted hairdryer</h3>
<p>It contains the most germs of anything in the room. Other studies have said the TV remote and bedside lamp switches are the most unhygienic. “Perhaps because it's something that's easy for the housekeepers to forget to check or to squirt down with disinfectant,” Forrest Jones said.</p>
<div>
<div>
<div data-scald-gallery="3739501">
<h2><span></span>Business news in pictures</h2>
</div>
</div>
</div>
<h3>6. Mini bars almost always lose money</h3>
<p>Despite the snacks in the minibar seeming like the most overpriced food you have ever seen, hotel owners are still struggling to make a profit from those snacks. "Minibars almost always lose money, even when they charge $10 for a Diet Coke,” Sharon said.</p>
<div>
<div>
<p><img alt="agenda7.jpg" height="423" src="https://static.independent.co.uk/s3fs-public/styles/story_medium/public/thumbnails/image/2014/03/13/16/agenda7.jpg" title="agenda7.jpg" width="564"></img></p>
</div>
<div>
<p>Towels should always be cleaned between stays
</p>
</div>
</div>
<p>7. Always made sure the hand towels are clean when you arrive</p>
<p>Forrest Jones made a discovery when he was helping out with the housekeepers. “You know where you almost always find a hand towel in any recently-vacated hotel room that was occupied by a guy? On the floor, next to the bed, about halfway down, maybe a little toward the foot of the bed. Same spot in the floor, next to almost every bed occupied by a man, in every room. I'll leave the rest to your imagination,” he said.</p>
<meta content="2016-05-08T10:11:51+01:00" itemprop="datePublished"></meta><ul><li>
More about:
</li>
<li><a href="http://fakehost/topic/Hotels" itemprop="keywords">Hotels</a></li>
<li><a href="http://fakehost/topic/Hygiene" itemprop="keywords">Hygiene</a></li>
</ul>
<p><a href="http://fakehost/syndication/reuse-permision-form?url=http://www.independent.co.uk/news/business/news/seven-secrets-that-hotel-owners-dont-want-you-to-know-10506160.html" target="_blank"><img src="http://fakehost/sites/all/themes/ines_themes/independent_theme/img/reuse.png" width="25"></img>Reuse content</a>
</p></div>

File diff suppressed because one or more lines are too long

@ -0,0 +1,5 @@
[
"http:\/\/s3-static-ak.buzzfed.com\/static\/2015-04\/22\/5\/campaign_images\/webdr03\/student-dies-after-diet-pills-she-bought-online-b-2-28712-1429696299-24_dblbig.jpg",
"http:\/\/ak-hdl.buzzfed.com\/static\/2015-04\/21\/5\/enhanced\/webdr12\/grid-cell-2501-1429608056-15.jpg",
"http:\/\/ak-hdl.buzzfed.com\/static\/2015-04\/21\/5\/enhanced\/webdr12\/grid-cell-2501-1429608057-18.jpg"
]

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save