pull latest readability-php via composer
parent
1baf8c5217
commit
cf1ede0ba8
@ -0,0 +1,3 @@
|
|||||||
|
coverage_clover: test/clover.xml
|
||||||
|
json_path: test/coveralls-upload.json
|
||||||
|
service_name: travis-ci
|
@ -0,0 +1 @@
|
|||||||
|
test/* linguist-language=PHP
|
@ -0,0 +1,4 @@
|
|||||||
|
.idea/
|
||||||
|
vendor
|
||||||
|
composer.lock
|
||||||
|
/test.*
|
@ -0,0 +1,13 @@
|
|||||||
|
preset: recommended
|
||||||
|
|
||||||
|
enabled:
|
||||||
|
- concat_with_spaces
|
||||||
|
- unalign_double_arrow
|
||||||
|
|
||||||
|
disabled:
|
||||||
|
- concat_without_spaces
|
||||||
|
- align_double_arrow
|
||||||
|
- simplified_null_return
|
||||||
|
- cast_spaces
|
||||||
|
- trailing_comma_in_multiline_array
|
||||||
|
- phpdoc_align
|
@ -0,0 +1,48 @@
|
|||||||
|
language: bash
|
||||||
|
services: docker
|
||||||
|
|
||||||
|
env:
|
||||||
|
matrix:
|
||||||
|
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.9
|
||||||
|
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.8
|
||||||
|
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.7
|
||||||
|
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.6
|
||||||
|
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.5
|
||||||
|
- PHP_VERSION=7.3 LIBXML_VERSION=2.9.4
|
||||||
|
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.9
|
||||||
|
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.8
|
||||||
|
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.7
|
||||||
|
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.6
|
||||||
|
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.5
|
||||||
|
- PHP_VERSION=7.2 LIBXML_VERSION=2.9.4
|
||||||
|
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.9
|
||||||
|
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.8
|
||||||
|
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.7
|
||||||
|
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.6
|
||||||
|
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.5
|
||||||
|
- PHP_VERSION=7.1 LIBXML_VERSION=2.9.4
|
||||||
|
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.9
|
||||||
|
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.8
|
||||||
|
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.7
|
||||||
|
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.6
|
||||||
|
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.5
|
||||||
|
- PHP_VERSION=7.0 LIBXML_VERSION=2.9.4
|
||||||
|
|
||||||
|
matrix:
|
||||||
|
allow_failures:
|
||||||
|
- env: LIBXML_VERSION=2.9.9
|
||||||
|
- env: LIBXML_VERSION=2.9.8
|
||||||
|
- env: LIBXML_VERSION=2.9.7
|
||||||
|
- env: LIBXML_VERSION=2.9.6
|
||||||
|
- env: LIBXML_VERSION=2.9.5
|
||||||
|
|
||||||
|
install:
|
||||||
|
- docker run --rm --volume $PWD:/app --workdir="/app" composer install
|
||||||
|
|
||||||
|
script:
|
||||||
|
- docker build --build-arg PHP_VERSION=${PHP_VERSION} --build-arg LIBXML_VERSION=${LIBXML_VERSION} -t travis-build - < ./docker/php/Dockerfile
|
||||||
|
- docker run --volume $PWD:/app --workdir="/app" travis-build php ./vendor/bin/phpunit --coverage-clover /app/test/clover.xml
|
||||||
|
|
||||||
|
after_script:
|
||||||
|
- docker run --volume $PWD:/app --workdir="/app" composer require php-coveralls/php-coveralls:^2.0
|
||||||
|
- docker run --volume $PWD:/app --workdir="/app" --env TRAVIS=${TRAVIS} --env TRAVIS_JOB_ID=${TRAVIS_JOB_ID} travis-build php ./vendor/php-coveralls/php-coveralls/bin/php-coveralls -v
|
@ -0,0 +1,13 @@
|
|||||||
|
# Authors
|
||||||
|
|
||||||
|
Readability.php developed by **Andres Rey**.
|
||||||
|
|
||||||
|
Based on Arc90's readability.js (1.7.1) script available at: http://code.google.com/p/arc90labs-readability.
|
||||||
|
Copyright (c) 2010 Arc90 Inc
|
||||||
|
|
||||||
|
The AUTHORS/Contributors are (and/or have been):
|
||||||
|
|
||||||
|
* Andres Rey
|
||||||
|
* Sergiy Lavryk
|
||||||
|
* Pedro Amorim
|
||||||
|
* Malu Decks
|
@ -0,0 +1,145 @@
|
|||||||
|
# Change Log
|
||||||
|
All notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
|
## [v2.1.0](https://github.com/andreskrey/readability.php/releases/tag/v2.1.0)
|
||||||
|
- Avoid overwriting extracted metadata with similarly named keys (like `og:image` and `og:image:width`)
|
||||||
|
- Imported new `getSiteName()` feature from JS version as of [21 Dec 2018](https://github.com/mozilla/readability/pull/504)
|
||||||
|
- Added getFirstElementChild function to NodeTrait + test case (Issue #83)
|
||||||
|
- Reworked the test suit to use TestPage objects and give more hints about what failed
|
||||||
|
- Removed getWordThreshold and setWordThreshold configuration functions
|
||||||
|
- Added NodeUtility::filterTextNodes and deprecated NodeTrait getChildren()
|
||||||
|
- Added new DOMNodeList fake class that mimics the original DOMNodeList class but allows to add new nodes to the list
|
||||||
|
- Added new Dockerfiles that pulls different versions of PHP and libxml. Now we are supporting 4 versions of PHP and 6 versions of libxml!
|
||||||
|
|
||||||
|
## [v2.0.1](https://github.com/andreskrey/readability.php/releases/tag/v2.0.1)
|
||||||
|
- Fixed small issue that prevented the main image from showing up in the results
|
||||||
|
|
||||||
|
## [v2.0.0](https://github.com/andreskrey/readability.php/releases/tag/v2.0.0)
|
||||||
|
|
||||||
|
- [BREAKING CHANGE] Bumped the minimum supported version of PHP to 7.0
|
||||||
|
- Clean `<aside>` tags during `prepArticle()`.
|
||||||
|
- Merged PR #58: Fix notice non-object on $parentOfTopCandidate for tumblr.com
|
||||||
|
- Fixed issue #63: Division by zero
|
||||||
|
- Housekeeping:
|
||||||
|
- Removed $parseSuccessful flag that wasn't needed anymore
|
||||||
|
- Rename wordThreshold to charThreshold and throw deprecation notices. WordThreshold will be removed in version 3.0.
|
||||||
|
- Added "-ad-" as unlikely candidate
|
||||||
|
- Added Docker containers with PHP 7.0, 7.1, and 7.2 and makefile to trigger the tests.
|
||||||
|
- Imported new code from the JS version as of [19 Nov 2018](https://github.com/mozilla/readability/commit/876c81f710711ba2afb36dd83889d4c5b4fc2743), which includes the following changes:
|
||||||
|
- Move phrasing contents [into paragraphs](https://github.com/mozilla/readability/commit/9f2c5cb42ee9635f091178271d66888cbb47e5dc)
|
||||||
|
- Improved the title detection
|
||||||
|
- Remove [single cell tables](https://github.com/mozilla/readability/commit/ea4165721f9105d8f1e53cfecdcfdafceaf3e4bf)
|
||||||
|
- Improved the detection of video related elements
|
||||||
|
- New test cases
|
||||||
|
- Various minor fixes
|
||||||
|
|
||||||
|
|
||||||
|
## [v1.2.0](https://github.com/andreskrey/readability.php/releases/tag/v1.2.0)
|
||||||
|
|
||||||
|
- Merged PR#49 (Missing object when calling `->getContent()`)
|
||||||
|
- Imported all changes from Readability.js as of 2 March 2018 ([8525c6a](https://github.com/mozilla/readability/commit/8525c6af36d3badbe27c4672a6f2dd99ddb4097f)):
|
||||||
|
- Check for `<base>` elements before converting URLs to absolute.
|
||||||
|
- Clean `<link>` tags on `prepArticle()`
|
||||||
|
- Attempt to return at least some text if all the algorithm runs fail (Check PR [#423](https://github.com/mozilla/readability/pull/423) on JS version)
|
||||||
|
- Add new test cases for the previous changes
|
||||||
|
- And all other changes reflected [in this diff](https://github.com/mozilla/readability/compare/c3ff1a2d2c94c1db257b2c9aa88a4b8fbeb221c5...8525c6af36d3badbe27c4672a6f2dd99ddb4097f)
|
||||||
|
|
||||||
|
## [v1.1.1](https://github.com/andreskrey/readability.php/releases/tag/v1.1.1)
|
||||||
|
|
||||||
|
- Switched from assertEquals to assertSame on unit testing to avoid weak comparisons.
|
||||||
|
- Added a safe check to avoid sending the DOMDocument as a node when scanning for node ancestors.
|
||||||
|
- Fix issue #45: Small mistake in documentation
|
||||||
|
- Fix issue #46: Added `data-src` as a image source path
|
||||||
|
- Fixed bug when extracting all the image of the article (Was extracting images from the original DOM instead of the parsed one)
|
||||||
|
- Added the `->getDOMDocument()` getter to retrieve the fully parsed DOMDocument
|
||||||
|
- Merged PR #48 that allows passing an array as configuration (@topotru)
|
||||||
|
|
||||||
|
## [v1.1.0](https://github.com/andreskrey/readability.php/releases/tag/v1.1.0)
|
||||||
|
|
||||||
|
- Added 'data-orig' as an URL source for images
|
||||||
|
- Removed 'modal' as a negative property from classes
|
||||||
|
- Added option to inject a logger
|
||||||
|
- Removed all references to the `data-readability` tags that don't apply anymore to the new structure
|
||||||
|
- Merged PR #38 (Missing DOMEntityReference)
|
||||||
|
|
||||||
|
## [v1.0.0](https://github.com/andreskrey/readability.php/releases/tag/v1.0.0)
|
||||||
|
|
||||||
|
- Node encapsulation is gone. Pre v1 all nodes where encapsulated in a Readability class, which created lots of trouble with dependencies, responsibilities, and properties. Now all the encapsulation is gone: all the DOMNodes inside the Readability class are extensions of the original DOM classes, which allows the system to take advantage of the functions and properties of DOMDocument.
|
||||||
|
- HTMLParser is gone, Readability is the new main class. Switched things a bit for this release. Pre v1 you had to create an HTMLParser class to parse the HTML. Now you have to create a Readability class, feed it the text, and check the result.
|
||||||
|
- No more dumb arrays as a result. If you want to get the title, content, images, or anything else you'll have to use the getters of the Readability class.
|
||||||
|
- Environment class is gone. Now you have to create a configuration class and use setters to set your configuration options.
|
||||||
|
- Exceptions. Make sure you wrap your Readability class in a try catch block, because if it fails to parse your HTML, it will throw a `ParseException`.
|
||||||
|
- Minimum PHP version bumped to 5.6.
|
||||||
|
|
||||||
|
## [v0.3.1](https://github.com/andreskrey/readability.php/releases/tag/v0.3.1)
|
||||||
|
|
||||||
|
- Trim titles when detecting hierarchical separators to avoid false negatives on strings with spaces.
|
||||||
|
- Fix issue when converting divs to p nodes and never rating them (issue #29)
|
||||||
|
- Fix "Unsupported operand types" (PR #31)
|
||||||
|
- Fix division by zero when no title was found (issue #32)
|
||||||
|
- New function to retrieve all images at once (PR #30)
|
||||||
|
- Get the title from the `<title>` tag before searching on the `<meta>` tags
|
||||||
|
|
||||||
|
## [v0.3.0](https://github.com/andreskrey/readability.php/releases/tag/v0.3.0)
|
||||||
|
|
||||||
|
- Merged PR #24. Fixes notice when trying to extract `og:image`
|
||||||
|
- Up to date to commit [eb221c5](https://github.com/mozilla/readability/commit/c3ff1a2d2c94c1db257b2c9aa88a4b8fbeb221c5) (2017-10-16), which includes the following changes:
|
||||||
|
- New tags added to the unlikelyCandidates regex
|
||||||
|
- Detection and removal of hierarchical separators in titles
|
||||||
|
- Added more tags to clean after parsing the article (`button`, `textarea`, `select`, etc.)
|
||||||
|
- New way to detect empty nodes (including a edge case where a node with a `&nsbp;` was detected as a node with content)
|
||||||
|
- Better approach to find a top candidate (specially when a top candidate is the only child of a parent node, which allows a more accurate joining of sibling elements)
|
||||||
|
- Detect text direction (`ltr` or `rtl`)
|
||||||
|
- Detect and mark data tables to avoid removing them during final clean up
|
||||||
|
- Major fixes when scanning and deleting nodes (no need to traverse backwards anymore)
|
||||||
|
- Node cleaning via regex matches
|
||||||
|
- Clean table attributes during final clean up.
|
||||||
|
- Added license
|
||||||
|
|
||||||
|
Next release after this one will be v1 and will be a major refactor around Readability and HTMLParser methods and responsibilities.
|
||||||
|
|
||||||
|
## [v0.2.2](https://github.com/andreskrey/readability.php/releases/tag/v0.2.2)
|
||||||
|
|
||||||
|
- Added a safecheck for really nasty HTML
|
||||||
|
- Added summonCthulhu option, to remove all script tags via regex
|
||||||
|
|
||||||
|
## [v0.2.1](https://github.com/andreskrey/readability.php/releases/tag/v0.2.1)
|
||||||
|
|
||||||
|
- Added `normalizeEntities` flag to convert UTF-8 characters to its HTML Entity equivalent. Fixes bugs on htmls with mixed encoding.
|
||||||
|
- Added more information to the readme.md file
|
||||||
|
- New way to create a backup DOM: not creating a backup. In the previous version, the system cloned the $this->dom object to keep it as a backup in order to restart the algorithm with other flags, if needed. This seemed to work until I realized that *sometimes* the backup changes even if we are not touching it. Seems that the `dom` and `backupdom` objects are linked and *some* changes on the dom object reach the bakcupdom object. The new approach consists in deleting the backupdom object and recreating from scratch the dom object. Of course this has a performance impact, but seems to be quite low.
|
||||||
|
|
||||||
|
## [v0.2.0](https://github.com/andreskrey/readability.php/releases/tag/v0.2.0)
|
||||||
|
|
||||||
|
100% complete port of Readability.js!
|
||||||
|
- Every test unit passes
|
||||||
|
- Readability.php produces the same exact output as Readability.js
|
||||||
|
- I'm happy :)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Lots of bugs
|
||||||
|
- Merged PR by DavidFricker to avoid exceptions while grabbing the document content
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- substituteEntities flag, to avoid replacing especial characters with HTML entities. There's nothing we can do about ` `, that entity is replaced by libxml and there's no way to disable it.
|
||||||
|
- Named data sets so it's easier to detect which test case is failing.
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
|
||||||
|
- Couple of test cases that involved broken JS. There's nothing we can do about JS spilling onto the text.
|
||||||
|
|
||||||
|
## [0.0.3-alpha](https://github.com/andreskrey/readability.php/releases/tag/v0.0.3v-alpha)
|
||||||
|
|
||||||
|
We are getting closer to be a 100% complete port of Readability.js!
|
||||||
|
- Added prepArticle to remove junk after selecting the top candidates.
|
||||||
|
- Added a function to restore score after selecting top candidates. This basically works by scanning the data-readability tag and restoring the score to the contentScore variable. This is an horrible hack and should be removed once we ditch the Element interface of html-to-markdown and start extending the DOMDocument object.
|
||||||
|
- Switched all strlen functions to mb_strlen
|
||||||
|
- Fixed lots of bugs and pretty sure that introduced a bunch of new ones.
|
||||||
|
|
||||||
|
## [0.0.2-alpha](https://github.com/andreskrey/readability.php/releases/tag/v0.0.2-alpha)
|
||||||
|
- Last version I'm using master as the main development branch. All unreleased changes and main development will happen in the develop branch.
|
||||||
|
|
||||||
|
## [0.0.1-alpha](https://github.com/andreskrey/readability.php/releases/tag/v0.0.1-alpha)
|
||||||
|
- Initial release
|
@ -0,0 +1,30 @@
|
|||||||
|
# Contributing
|
||||||
|
|
||||||
|
Contributions are **welcome** and will be fully **credited**.
|
||||||
|
|
||||||
|
We accept contributions via Pull Requests on [Github](https://github.com/andreskrey/readability.php/).
|
||||||
|
|
||||||
|
|
||||||
|
## Pull Requests
|
||||||
|
|
||||||
|
- **Document any change in behaviour** - Make sure the `README.md` and any other relevant documentation are kept up-to-date.
|
||||||
|
|
||||||
|
- **Add tests!** - Your patch won't be accepted if it doesn't have tests.
|
||||||
|
|
||||||
|
- **Create feature branches** - Don't ask us to pull from your master branch.
|
||||||
|
|
||||||
|
- **One pull request per feature** - If you want to do more than one thing, send multiple pull requests.
|
||||||
|
|
||||||
|
- **Send coherent history** - Make sure each individual commit in your pull request is meaningful. If you had to make multiple intermediate commits while developing, please [squash them](http://www.git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Changing-Multiple-Commit-Messages) before submitting.
|
||||||
|
|
||||||
|
- **Don't forget to add yourself to AUTHORS.md** - If you want to be credited, make sure you add your information (whatever you want to include) in `AUTHORS.md`.
|
||||||
|
|
||||||
|
|
||||||
|
## Running Tests
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
$ make test-all #requires docker and docker-compose
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
**Happy coding**!
|
@ -0,0 +1,201 @@
|
|||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
@ -0,0 +1,30 @@
|
|||||||
|
.PHONY: test-all
|
||||||
|
|
||||||
|
test-all: start test-7.3 test-7.2 test-7.1 test-7.0 stop
|
||||||
|
|
||||||
|
test-7.3:
|
||||||
|
docker-compose exec php-7.3-libxml-2.9.9 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
|
||||||
|
|
||||||
|
test-7.2:
|
||||||
|
docker-compose exec php-7.2-libxml-2.9.9 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
|
||||||
|
|
||||||
|
test-7.1:
|
||||||
|
docker-compose exec php-7.1-libxml-2.9.9 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
|
||||||
|
|
||||||
|
test-7.0:
|
||||||
|
docker-compose exec php-7.0-libxml-2.9.9 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
|
||||||
|
|
||||||
|
start:
|
||||||
|
docker-compose up -d php-7.3-libxml-2.9.9 php-7.2-libxml-2.9.9 php-7.1-libxml-2.9.9 php-7.0-libxml-2.9.9
|
||||||
|
|
||||||
|
stop:
|
||||||
|
docker-compose stop
|
||||||
|
|
||||||
|
test-all-versions:
|
||||||
|
for php_version in 7.0 7.1 7.2 7.3; do \
|
||||||
|
for libxml_version in 2.9.4 2.9.5 2.9.6 2.9.7 2.9.8 2.9.9; do \
|
||||||
|
docker-compose up -d php-$$php_version-libxml-$$libxml_version; \
|
||||||
|
docker-compose exec php-$$php_version-libxml-$$libxml_version php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml; \
|
||||||
|
done \
|
||||||
|
done
|
||||||
|
docker-compose stop
|
@ -0,0 +1,37 @@
|
|||||||
|
{
|
||||||
|
"name": "andreskrey/readability.php",
|
||||||
|
"type": "library",
|
||||||
|
"description": "A PHP port of Readability.js",
|
||||||
|
"keywords": ["readability", "html"],
|
||||||
|
"homepage": "https://github.com/andreskrey/readability",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "Andres Rey",
|
||||||
|
"email": "andreskrey@gmail.com",
|
||||||
|
"role": "Lead Developer"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"autoload": {
|
||||||
|
"psr-4": {
|
||||||
|
"andreskrey\\Readability\\": "src/"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"autoload-dev": {
|
||||||
|
"psr-4": {"andreskrey\\Readability\\Test\\": "test"}
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"php": ">=7.0.0",
|
||||||
|
"ext-dom": "*",
|
||||||
|
"ext-xml": "*",
|
||||||
|
"ext-mbstring": "*",
|
||||||
|
"psr/log": "^1.0"
|
||||||
|
},
|
||||||
|
"require-dev": {
|
||||||
|
"phpunit/phpunit": "^6.5",
|
||||||
|
"monolog/monolog": "^1.24"
|
||||||
|
},
|
||||||
|
"suggest": {
|
||||||
|
"monolog/monolog": "Allow logging debug information"
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,196 @@
|
|||||||
|
version: '3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
php-7.0-libxml-2.9.4: &template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.4
|
||||||
|
PHP_VERSION: 7.0
|
||||||
|
volumes:
|
||||||
|
- ./:/app
|
||||||
|
tty: true
|
||||||
|
|
||||||
|
php-7.0-libxml-2.9.5:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.5
|
||||||
|
PHP_VERSION: 7.0
|
||||||
|
|
||||||
|
php-7.0-libxml-2.9.6:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.6
|
||||||
|
PHP_VERSION: 7.0
|
||||||
|
|
||||||
|
php-7.0-libxml-2.9.7:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.7
|
||||||
|
PHP_VERSION: 7.0
|
||||||
|
|
||||||
|
php-7.0-libxml-2.9.8:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.8
|
||||||
|
PHP_VERSION: 7.0
|
||||||
|
|
||||||
|
php-7.0-libxml-2.9.9:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.9
|
||||||
|
PHP_VERSION: 7.0
|
||||||
|
|
||||||
|
php-7.1-libxml-2.9.4:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.4
|
||||||
|
PHP_VERSION: 7.1
|
||||||
|
|
||||||
|
php-7.1-libxml-2.9.5:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.5
|
||||||
|
PHP_VERSION: 7.1
|
||||||
|
|
||||||
|
php-7.1-libxml-2.9.6:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.6
|
||||||
|
PHP_VERSION: 7.1
|
||||||
|
|
||||||
|
php-7.1-libxml-2.9.7:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.7
|
||||||
|
PHP_VERSION: 7.1
|
||||||
|
|
||||||
|
php-7.1-libxml-2.9.8:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.8
|
||||||
|
PHP_VERSION: 7.1
|
||||||
|
|
||||||
|
php-7.1-libxml-2.9.9:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.9
|
||||||
|
PHP_VERSION: 7.1
|
||||||
|
|
||||||
|
php-7.2-libxml-2.9.4:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.4
|
||||||
|
PHP_VERSION: 7.2
|
||||||
|
|
||||||
|
php-7.2-libxml-2.9.5:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.5
|
||||||
|
PHP_VERSION: 7.2
|
||||||
|
|
||||||
|
php-7.2-libxml-2.9.6:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.6
|
||||||
|
PHP_VERSION: 7.2
|
||||||
|
|
||||||
|
php-7.2-libxml-2.9.7:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.7
|
||||||
|
PHP_VERSION: 7.2
|
||||||
|
|
||||||
|
php-7.2-libxml-2.9.8:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.8
|
||||||
|
PHP_VERSION: 7.2
|
||||||
|
|
||||||
|
php-7.2-libxml-2.9.9:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.9
|
||||||
|
PHP_VERSION: 7.2
|
||||||
|
|
||||||
|
php-7.3-libxml-2.9.4:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.4
|
||||||
|
PHP_VERSION: 7.3
|
||||||
|
|
||||||
|
php-7.3-libxml-2.9.5:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.5
|
||||||
|
PHP_VERSION: 7.3
|
||||||
|
|
||||||
|
php-7.3-libxml-2.9.6:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.6
|
||||||
|
PHP_VERSION: 7.3
|
||||||
|
|
||||||
|
php-7.3-libxml-2.9.7:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.7
|
||||||
|
PHP_VERSION: 7.3
|
||||||
|
|
||||||
|
php-7.3-libxml-2.9.8:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.8
|
||||||
|
PHP_VERSION: 7.3
|
||||||
|
|
||||||
|
php-7.3-libxml-2.9.9:
|
||||||
|
<<: *template
|
||||||
|
build:
|
||||||
|
context: ./docker/php
|
||||||
|
args:
|
||||||
|
LIBXML_VERSION: 2.9.9
|
||||||
|
PHP_VERSION: 7.3
|
@ -0,0 +1,8 @@
|
|||||||
|
ARG PHP_VERSION
|
||||||
|
ARG LIBXML_VERSION
|
||||||
|
FROM andreskrey/php-${PHP_VERSION}:libxml-${LIBXML_VERSION}
|
||||||
|
|
||||||
|
RUN pecl install xdebug && docker-php-ext-enable xdebug
|
||||||
|
|
||||||
|
# Required by coveralls
|
||||||
|
RUN apt-get install git -y
|
@ -0,0 +1,16 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<phpunit bootstrap="vendor/autoload.php"
|
||||||
|
colors="true"
|
||||||
|
stopOnFailure="false"
|
||||||
|
stopOnError="false">
|
||||||
|
<testsuites>
|
||||||
|
<testsuite name="Readability.php Test Suite">
|
||||||
|
<directory>./test/</directory>
|
||||||
|
</testsuite>
|
||||||
|
</testsuites>
|
||||||
|
<filter>
|
||||||
|
<whitelist>
|
||||||
|
<directory suffix=".php">src/</directory>
|
||||||
|
</whitelist>
|
||||||
|
</filter>
|
||||||
|
</phpunit>
|
@ -0,0 +1,92 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace andreskrey\Readability\Test;
|
||||||
|
|
||||||
|
use andreskrey\Readability\Configuration;
|
||||||
|
use Monolog\Handler\NullHandler;
|
||||||
|
use Monolog\Logger;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class ConfigurationTest.
|
||||||
|
*/
|
||||||
|
class ConfigurationTest extends \PHPUnit\Framework\TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @dataProvider getParams
|
||||||
|
*
|
||||||
|
* @param array $params
|
||||||
|
*/
|
||||||
|
public function testConfigurationConstructorSetsParameters(array $params)
|
||||||
|
{
|
||||||
|
$config = new Configuration($params);
|
||||||
|
$this->doEqualsAsserts($config, $params);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @dataProvider getParams
|
||||||
|
*
|
||||||
|
* @param array $params
|
||||||
|
*/
|
||||||
|
public function testInvalidParameterIsNotInConfig(array $params)
|
||||||
|
{
|
||||||
|
$config = new Configuration($params);
|
||||||
|
$this->assertArrayNotHasKey('invalidParameter', $config->toArray(), 'Invalid param key is not present in config');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param Configuration $config
|
||||||
|
* @param array $options
|
||||||
|
*/
|
||||||
|
private function doEqualsAsserts(Configuration $config, array $options)
|
||||||
|
{
|
||||||
|
$this->assertEquals($options['maxTopCandidates'], $config->getMaxTopCandidates());
|
||||||
|
$this->assertEquals($options['charThreshold'], $config->getCharThreshold());
|
||||||
|
$this->assertEquals($options['articleByLine'], $config->getArticleByLine());
|
||||||
|
$this->assertEquals($options['stripUnlikelyCandidates'], $config->getStripUnlikelyCandidates());
|
||||||
|
$this->assertEquals($options['cleanConditionally'], $config->getCleanConditionally());
|
||||||
|
$this->assertEquals($options['weightClasses'], $config->getWeightClasses());
|
||||||
|
$this->assertEquals($options['fixRelativeURLs'], $config->getFixRelativeURLs());
|
||||||
|
$this->assertEquals($options['substituteEntities'], $config->getSubstituteEntities());
|
||||||
|
$this->assertEquals($options['normalizeEntities'], $config->getNormalizeEntities());
|
||||||
|
$this->assertEquals($options['originalURL'], $config->getOriginalURL());
|
||||||
|
$this->assertEquals($options['summonCthulhu'], $config->getOriginalURL());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function getParams()
|
||||||
|
{
|
||||||
|
return [[
|
||||||
|
'All current parameters' => [
|
||||||
|
'maxTopCandidates' => 3,
|
||||||
|
'wordThreshold' => 500,
|
||||||
|
'charThreshold' => 500,
|
||||||
|
'articleByLine' => true,
|
||||||
|
'stripUnlikelyCandidates' => false,
|
||||||
|
'cleanConditionally' => false,
|
||||||
|
'weightClasses' => false,
|
||||||
|
'fixRelativeURLs' => true,
|
||||||
|
'substituteEntities' => true,
|
||||||
|
'normalizeEntities' => true,
|
||||||
|
'originalURL' => 'my.original.url',
|
||||||
|
'summonCthulhu' => 'my.original.url',
|
||||||
|
'invalidParameter' => 'invalidParameterValue'
|
||||||
|
]
|
||||||
|
]];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test if a logger interface can be injected and retrieved from the Configuration object.
|
||||||
|
*/
|
||||||
|
public function testLoggerCanBeInjected()
|
||||||
|
{
|
||||||
|
$configuration = new Configuration();
|
||||||
|
$log = new Logger('Readability');
|
||||||
|
$log->pushHandler(new NullHandler());
|
||||||
|
|
||||||
|
$configuration->setLogger($log);
|
||||||
|
|
||||||
|
$this->assertSame($log, $configuration->getLogger());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,148 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace andreskrey\Readability\Test;
|
||||||
|
|
||||||
|
use andreskrey\Readability\Configuration;
|
||||||
|
use andreskrey\Readability\ParseException;
|
||||||
|
use andreskrey\Readability\Readability;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class ReadabilityTest.
|
||||||
|
*/
|
||||||
|
class ReadabilityTest extends \PHPUnit\Framework\TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Test that Readability parses the HTML correctly and matches the expected result.
|
||||||
|
*
|
||||||
|
* @dataProvider getSamplePages
|
||||||
|
*
|
||||||
|
* @param TestPage $testPage
|
||||||
|
*
|
||||||
|
* @throws ParseException
|
||||||
|
*/
|
||||||
|
public function testReadabilityParsesHTML(TestPage $testPage)
|
||||||
|
{
|
||||||
|
$options = ['OriginalURL' => 'http://fakehost/test/test.html',
|
||||||
|
'FixRelativeURLs' => true,
|
||||||
|
'SubstituteEntities' => true,
|
||||||
|
'ArticleByLine' => true
|
||||||
|
];
|
||||||
|
|
||||||
|
$configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
|
||||||
|
|
||||||
|
$readability = new Readability($configuration);
|
||||||
|
$readability->parse($testPage->getSourceHTML());
|
||||||
|
|
||||||
|
$this->assertSame($testPage->getExpectedHTML(), $readability->getContent(), 'Parsed text does not match the expected one.');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that Readability parses the HTML correctly and matches the expected result.
|
||||||
|
*
|
||||||
|
* @dataProvider getSamplePages
|
||||||
|
*
|
||||||
|
* @param TestPage $testPage
|
||||||
|
*
|
||||||
|
* @throws ParseException
|
||||||
|
*/
|
||||||
|
public function testReadabilityParsesMetadata(TestPage $testPage)
|
||||||
|
{
|
||||||
|
$options = ['OriginalURL' => 'http://fakehost/test/test.html',
|
||||||
|
'FixRelativeURLs' => true,
|
||||||
|
'SubstituteEntities' => true,
|
||||||
|
'ArticleByLine' => true
|
||||||
|
];
|
||||||
|
|
||||||
|
$configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
|
||||||
|
|
||||||
|
$readability = new Readability($configuration);
|
||||||
|
$readability->parse($testPage->getSourceHTML());
|
||||||
|
|
||||||
|
$this->assertSame($testPage->getExpectedMetadata()->Author, $readability->getAuthor(), 'Parsed Author does not match expected value.');
|
||||||
|
$this->assertSame($testPage->getExpectedMetadata()->Direction, $readability->getDirection(), 'Parsed Direction does not match expected value.');
|
||||||
|
$this->assertSame($testPage->getExpectedMetadata()->Excerpt, $readability->getExcerpt(), 'Parsed Excerpt does not match expected value.');
|
||||||
|
$this->assertSame($testPage->getExpectedMetadata()->Image, $readability->getImage(), 'Parsed Image does not match expected value.');
|
||||||
|
$this->assertSame($testPage->getExpectedMetadata()->Title, $readability->getTitle(), 'Parsed Title does not match expected value.');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that Readability returns all the expected images from the test page.
|
||||||
|
*
|
||||||
|
* @param TestPage $testPage
|
||||||
|
* @dataProvider getSamplePages
|
||||||
|
*
|
||||||
|
* @throws ParseException
|
||||||
|
*/
|
||||||
|
public function testHTMLParserParsesImages(TestPage $testPage)
|
||||||
|
{
|
||||||
|
$options = ['OriginalURL' => 'http://fakehost/test/test.html',
|
||||||
|
'fixRelativeURLs' => true,
|
||||||
|
'substituteEntities' => true,
|
||||||
|
];
|
||||||
|
|
||||||
|
$configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
|
||||||
|
|
||||||
|
$readability = new Readability($configuration);
|
||||||
|
$readability->parse($testPage->getSourceHTML());
|
||||||
|
|
||||||
|
$this->assertSame($testPage->getExpectedImages(), $readability->getImages());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main data provider.
|
||||||
|
*
|
||||||
|
* @return \Generator
|
||||||
|
*/
|
||||||
|
public function getSamplePages()
|
||||||
|
{
|
||||||
|
$path = pathinfo(__FILE__, PATHINFO_DIRNAME) . DIRECTORY_SEPARATOR . 'test-pages';
|
||||||
|
$testPages = scandir($path);
|
||||||
|
|
||||||
|
foreach (array_slice($testPages, 2) as $testPage) {
|
||||||
|
$testCasePath = $path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR;
|
||||||
|
|
||||||
|
$source = file_get_contents($testCasePath . 'source.html');
|
||||||
|
$expectedHTML = file_get_contents($testCasePath . 'expected.html');
|
||||||
|
$expectedImages = json_decode(file_get_contents($testCasePath . 'expected-images.json'), true);
|
||||||
|
$expectedMetadata = json_decode(file_get_contents($testCasePath . 'expected-metadata.json'));
|
||||||
|
$configuration = file_exists($testCasePath . 'config.json') ? json_decode(file_get_contents($testCasePath . 'config.json'), true) : [];
|
||||||
|
|
||||||
|
yield $testPage => [new TestPage($configuration, $source, $expectedHTML, $expectedImages, $expectedMetadata)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that Readability throws an exception with malformed HTML.
|
||||||
|
*
|
||||||
|
* @throws ParseException
|
||||||
|
*/
|
||||||
|
public function testReadabilityThrowsExceptionWithMalformedHTML()
|
||||||
|
{
|
||||||
|
$parser = new Readability(new Configuration());
|
||||||
|
$this->expectException(ParseException::class);
|
||||||
|
$this->expectExceptionMessage('Invalid or incomplete HTML.');
|
||||||
|
$parser->parse('<html>');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that Readability throws an exception with incomplete or short HTML.
|
||||||
|
*
|
||||||
|
* @throws ParseException
|
||||||
|
*/
|
||||||
|
public function testReadabilityThrowsExceptionWithUnparseableHTML()
|
||||||
|
{
|
||||||
|
$parser = new Readability(new Configuration());
|
||||||
|
$this->expectException(ParseException::class);
|
||||||
|
$this->expectExceptionMessage('Could not parse text.');
|
||||||
|
$parser->parse('<html><body><p></p></body></html>');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that the Readability object has no content as soon as it is instantiated.
|
||||||
|
*/
|
||||||
|
public function testReadabilityCallGetContentWithNoContent()
|
||||||
|
{
|
||||||
|
$parser = new Readability(new Configuration());
|
||||||
|
$this->assertNull($parser->getContent());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,61 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace andreskrey\Readability\Test;
|
||||||
|
|
||||||
|
class TestPage
|
||||||
|
{
|
||||||
|
private $configuration;
|
||||||
|
private $sourceHTML;
|
||||||
|
private $expectedHTML;
|
||||||
|
private $expectedImages;
|
||||||
|
private $expectedMetadata;
|
||||||
|
|
||||||
|
public function __construct($configuration, $sourceHTML, $expectedHTML, $expectedImages, $expectedMetadata)
|
||||||
|
{
|
||||||
|
$this->configuration = $configuration;
|
||||||
|
$this->sourceHTML = $sourceHTML;
|
||||||
|
$this->expectedHTML = $expectedHTML;
|
||||||
|
$this->expectedImages = $expectedImages;
|
||||||
|
$this->expectedMetadata = $expectedMetadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function getConfiguration()
|
||||||
|
{
|
||||||
|
return $this->configuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return null
|
||||||
|
*/
|
||||||
|
public function getSourceHTML()
|
||||||
|
{
|
||||||
|
return $this->sourceHTML;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return null
|
||||||
|
*/
|
||||||
|
public function getExpectedHTML()
|
||||||
|
{
|
||||||
|
return $this->expectedHTML;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
public function getExpectedImages()
|
||||||
|
{
|
||||||
|
return $this->expectedImages;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return \stdClass
|
||||||
|
*/
|
||||||
|
public function getExpectedMetadata()
|
||||||
|
{
|
||||||
|
return $this->expectedMetadata;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"ArticleByLine": true
|
||||||
|
}
|
@ -0,0 +1,3 @@
|
|||||||
|
[
|
||||||
|
"http:\/\/fakehost\/static\/code\/2013\/blanket-coverage.png"
|
||||||
|
]
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": "Nicolas Perriault —",
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "Nicolas Perriault's homepage.",
|
||||||
|
"Image": null,
|
||||||
|
"Title": "Get your Frontend JavaScript Code Covered | Code",
|
||||||
|
"SiteName": null
|
||||||
|
}
|
@ -0,0 +1,128 @@
|
|||||||
|
<section><p><strong>So finally you're <a href="http://fakehost/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">testing your frontend JavaScript code</a>? Great! The more you
|
||||||
|
write tests, the more confident you are with your code… but how much precisely?
|
||||||
|
That's where <a href="http://en.wikipedia.org/wiki/Code_coverage">code coverage</a> might
|
||||||
|
help.</strong>
|
||||||
|
</p>
|
||||||
|
<p>The idea behind code coverage is to record which parts of your code (functions,
|
||||||
|
statements, conditionals and so on) have been executed by your test suite,
|
||||||
|
to compute metrics out of these data and usually to provide tools for navigating
|
||||||
|
and inspecting them.</p>
|
||||||
|
<p>Not a lot of frontend developers I know actually test their frontend code,
|
||||||
|
and I can barely imagine how many of them have ever setup code coverage…
|
||||||
|
Mostly because there are not many frontend-oriented tools in this area
|
||||||
|
I guess.</p>
|
||||||
|
<p>Actually I've only found one which provides an adapter for <a href="http://visionmedia.github.io/mocha/">Mocha</a> and
|
||||||
|
actually works…</p>
|
||||||
|
<blockquote>
|
||||||
|
<p>Drinking game for web devs:
|
||||||
|
<br></br>(1) Think of a noun
|
||||||
|
<br></br>(2) Google "<noun>.js"
|
||||||
|
<br></br>(3) If a library with that name exists - drink</p>— Shay Friedman (@ironshay)
|
||||||
|
<a href="https://twitter.com/ironshay/statuses/370525864523743232">August 22, 2013</a>
|
||||||
|
</blockquote>
|
||||||
|
<p><strong><a href="http://blanketjs.org/">Blanket.js</a></strong> is an <em>easy to install, easy to configure,
|
||||||
|
and easy to use JavaScript code coverage library that works both in-browser and
|
||||||
|
with nodejs.</em>
|
||||||
|
</p>
|
||||||
|
<p>Its use is dead easy, adding Blanket support to your Mocha test suite
|
||||||
|
is just matter of adding this simple line to your HTML test file:</p>
|
||||||
|
<pre><code><script src="vendor/blanket.js"
|
||||||
|
data-cover-adapter="vendor/mocha-blanket.js"></script>
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<p>Source files: <a href="https://raw.github.com/alex-seville/blanket/master/dist/qunit/blanket.min.js">blanket.js</a>,
|
||||||
|
<a href="https://raw.github.com/alex-seville/blanket/master/src/adapters/mocha-blanket.js">mocha-blanket.js</a>
|
||||||
|
</p>
|
||||||
|
<p>As an example, let's reuse the silly <code>Cow</code> example we used
|
||||||
|
<a href="http://fakehost/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">in a previous episode</a>:</p>
|
||||||
|
<pre><code>// cow.js
|
||||||
|
(function(exports) {
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
function Cow(name) {
|
||||||
|
this.name = name || "Anon cow";
|
||||||
|
}
|
||||||
|
exports.Cow = Cow;
|
||||||
|
|
||||||
|
Cow.prototype = {
|
||||||
|
greets: function(target) {
|
||||||
|
if (!target)
|
||||||
|
throw new Error("missing target");
|
||||||
|
return this.name + " greets " + target;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
})(this);
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<p>And its test suite, powered by Mocha and <a href="http://chaijs.com/">Chai</a>:</p>
|
||||||
|
<pre><code>var expect = chai.expect;
|
||||||
|
|
||||||
|
describe("Cow", function() {
|
||||||
|
describe("constructor", function() {
|
||||||
|
it("should have a default name", function() {
|
||||||
|
var cow = new Cow();
|
||||||
|
expect(cow.name).to.equal("Anon cow");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should set cow's name if provided", function() {
|
||||||
|
var cow = new Cow("Kate");
|
||||||
|
expect(cow.name).to.equal("Kate");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("#greets", function() {
|
||||||
|
it("should greet passed target", function() {
|
||||||
|
var greetings = (new Cow("Kate")).greets("Baby");
|
||||||
|
expect(greetings).to.equal("Kate greets Baby");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<p>Let's create the HTML test file for it, featuring Blanket and its adapter
|
||||||
|
for Mocha:</p>
|
||||||
|
<pre><code><!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Test</title>
|
||||||
|
<link rel="stylesheet" media="all" href="vendor/mocha.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="mocha"></div>
|
||||||
|
<div id="messages"></div>
|
||||||
|
<div id="fixtures"></div>
|
||||||
|
<script src="vendor/mocha.js"></script>
|
||||||
|
<script src="vendor/chai.js"></script>
|
||||||
|
<script src="vendor/blanket.js"
|
||||||
|
data-cover-adapter="vendor/mocha-blanket.js"></script>
|
||||||
|
<script>mocha.setup('bdd');</script>
|
||||||
|
<script src="cow.js" data-cover></script>
|
||||||
|
<script src="cow_test.js"></script>
|
||||||
|
<script>mocha.run();</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<p><strong>Notes</strong>:</p>
|
||||||
|
<ul><li>Notice the <code>data-cover</code> attribute we added to the script tag
|
||||||
|
loading the source of our library;</li>
|
||||||
|
<li>The HTML test file <em>must</em> be served over HTTP for the adapter to
|
||||||
|
be loaded.</li>
|
||||||
|
</ul><p>Running the tests now gives us something like this:</p>
|
||||||
|
<p>
|
||||||
|
<img alt="screenshot" src="http://fakehost/static/code/2013/blanket-coverage.png"></img></p>
|
||||||
|
<p>As you can see, the report at the bottom highlights that we haven't actually
|
||||||
|
tested the case where an error is raised in case a target name is missing.
|
||||||
|
We've been informed of that, nothing more, nothing less. We simply know
|
||||||
|
we're missing a test here. Isn't this cool? I think so!</p>
|
||||||
|
<p>Just remember that code coverage will only <a href="http://codebetter.com/karlseguin/2008/12/09/code-coverage-use-it-wisely/">bring you numbers</a> and
|
||||||
|
raw information, not actual proofs that the whole of your <em>code logic</em> has
|
||||||
|
been actually covered. If you ask me, the best inputs you can get about
|
||||||
|
your code logic and implementation ever are the ones issued out of <a href="http://www.extremeprogramming.org/rules/pair.html">pair programming</a>
|
||||||
|
sessions
|
||||||
|
and <a href="http://alexgaynor.net/2013/sep/26/effective-code-review/">code reviews</a> —
|
||||||
|
but that's another story.</p>
|
||||||
|
<p><strong>So is code coverage silver bullet? No. Is it useful? Definitely. Happy testing!</strong>
|
||||||
|
</p>
|
||||||
|
</section>
|
@ -0,0 +1,233 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html class="no-js" lang="en">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"/>
|
||||||
|
<title>Get your Frontend JavaScript Code Covered | Code | Nicolas Perriault</title>
|
||||||
|
<meta
|
||||||
|
name="description" content="Nicolas Perriault's homepage."/>
|
||||||
|
<meta name="viewport" content="width=device-width"/>
|
||||||
|
<link href="//fonts.googleapis.com/css?family=Asap:400,400italic,700,700italic&subset=latin,latin-ext"
|
||||||
|
rel="stylesheet" type="text/css"/>
|
||||||
|
<link rel="stylesheet" type="text/css" href="/static/packed.css?1412806084"/>
|
||||||
|
<link rel="alternate" type="application/rss+xml" href="/code/feed/" title="Code (RSS)"/>
|
||||||
|
<link rel="alternate" type="application/rss+xml" href="/photography/feed/"
|
||||||
|
title="Photography (RSS)"/>
|
||||||
|
<link rel="alternate" type="application/rss+xml" href="/talks/feed/" title="Talks (RSS)"/>
|
||||||
|
<link rel="alternate" type="application/rss+xml" href="/carnet/feed/"
|
||||||
|
title="Carnet (RSS)"/>
|
||||||
|
<link rel="alternate" type="application/rss+xml" href="/feed/" title="Everything (RSS)"/>
|
||||||
|
<!--[if lt IE 9]>
|
||||||
|
<script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
|
||||||
|
<![endif]-->
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body class="code " onload="prettyPrint()">
|
||||||
|
<!--[if lt IE 7]>
|
||||||
|
<p class="chromeframe">Your browser is <em>ancient!</em> Please <a href="http://www.quirksmode.org/upgrade.html">upgrade</a>.</p>
|
||||||
|
<![endif]-->
|
||||||
|
<div class="container">
|
||||||
|
<header class="main-title">
|
||||||
|
<h1><a href="/">Hi, I'm <strong>Nicolas.</strong></a></h1>
|
||||||
|
<small>I code stuff. I take photos. I write rants.</small>
|
||||||
|
|
||||||
|
</header>
|
||||||
|
<main class="contents" role="main">
|
||||||
|
<article lang="en" class="code" itemscope="" itemtype="http://schema.org/BlogPosting">
|
||||||
|
<link itemprop="url" href="/code/2013/get-your-frontend-javascript-code-covered/"/>
|
||||||
|
<header>
|
||||||
|
<h2><a itemprop="name" href="/code/2013/get-your-frontend-javascript-code-covered/">Get your Frontend JavaScript Code Covered</a></h2>
|
||||||
|
</header>
|
||||||
|
<section>
|
||||||
|
<p><strong>So finally you're <a href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">testing your frontend JavaScript code</a>? Great! The more you
|
||||||
|
write tests, the more confident you are with your code… but how much precisely?
|
||||||
|
That's where <a href="http://en.wikipedia.org/wiki/Code_coverage">code coverage</a> might
|
||||||
|
help.</strong>
|
||||||
|
</p>
|
||||||
|
<p>The idea behind code coverage is to record which parts of your code (functions,
|
||||||
|
statements, conditionals and so on) have been executed by your test suite,
|
||||||
|
to compute metrics out of these data and usually to provide tools for navigating
|
||||||
|
and inspecting them.</p>
|
||||||
|
<p>Not a lot of frontend developers I know actually test their frontend code,
|
||||||
|
and I can barely imagine how many of them have ever setup code coverage…
|
||||||
|
Mostly because there are not many frontend-oriented tools in this area
|
||||||
|
I guess.</p>
|
||||||
|
<p>Actually I've only found one which provides an adapter for <a href="http://visionmedia.github.io/mocha/">Mocha</a> and
|
||||||
|
actually works…</p>
|
||||||
|
<blockquote class="twitter-tweet tw-align-center">
|
||||||
|
<p>Drinking game for web devs:
|
||||||
|
<br />(1) Think of a noun
|
||||||
|
<br />(2) Google "<noun>.js"
|
||||||
|
<br />(3) If a library with that name exists - drink</p>— Shay Friedman (@ironshay)
|
||||||
|
<a
|
||||||
|
href="https://twitter.com/ironshay/statuses/370525864523743232">August 22, 2013</a>
|
||||||
|
</blockquote>
|
||||||
|
<p><strong><a href="http://blanketjs.org/">Blanket.js</a></strong> is an <em>easy to install, easy to configure,
|
||||||
|
and easy to use JavaScript code coverage library that works both in-browser and
|
||||||
|
with nodejs.</em>
|
||||||
|
</p>
|
||||||
|
<p>Its use is dead easy, adding Blanket support to your Mocha test suite
|
||||||
|
is just matter of adding this simple line to your HTML test file:</p>
|
||||||
|
<pre><code><script src="vendor/blanket.js"
|
||||||
|
data-cover-adapter="vendor/mocha-blanket.js"></script>
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<p>Source files: <a href="https://raw.github.com/alex-seville/blanket/master/dist/qunit/blanket.min.js">blanket.js</a>,
|
||||||
|
<a
|
||||||
|
href="https://raw.github.com/alex-seville/blanket/master/src/adapters/mocha-blanket.js">mocha-blanket.js</a>
|
||||||
|
</p>
|
||||||
|
<p>As an example, let's reuse the silly <code>Cow</code> example we used
|
||||||
|
<a
|
||||||
|
href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">in a previous episode</a>:</p>
|
||||||
|
<pre><code>// cow.js
|
||||||
|
(function(exports) {
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
function Cow(name) {
|
||||||
|
this.name = name || "Anon cow";
|
||||||
|
}
|
||||||
|
exports.Cow = Cow;
|
||||||
|
|
||||||
|
Cow.prototype = {
|
||||||
|
greets: function(target) {
|
||||||
|
if (!target)
|
||||||
|
throw new Error("missing target");
|
||||||
|
return this.name + " greets " + target;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
})(this);
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<p>And its test suite, powered by Mocha and <a href="http://chaijs.com/">Chai</a>:</p>
|
||||||
|
<pre><code>var expect = chai.expect;
|
||||||
|
|
||||||
|
describe("Cow", function() {
|
||||||
|
describe("constructor", function() {
|
||||||
|
it("should have a default name", function() {
|
||||||
|
var cow = new Cow();
|
||||||
|
expect(cow.name).to.equal("Anon cow");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should set cow's name if provided", function() {
|
||||||
|
var cow = new Cow("Kate");
|
||||||
|
expect(cow.name).to.equal("Kate");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("#greets", function() {
|
||||||
|
it("should greet passed target", function() {
|
||||||
|
var greetings = (new Cow("Kate")).greets("Baby");
|
||||||
|
expect(greetings).to.equal("Kate greets Baby");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<p>Let's create the HTML test file for it, featuring Blanket and its adapter
|
||||||
|
for Mocha:</p>
|
||||||
|
<pre><code><!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Test</title>
|
||||||
|
<link rel="stylesheet" media="all" href="vendor/mocha.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="mocha"></div>
|
||||||
|
<div id="messages"></div>
|
||||||
|
<div id="fixtures"></div>
|
||||||
|
<script src="vendor/mocha.js"></script>
|
||||||
|
<script src="vendor/chai.js"></script>
|
||||||
|
<script src="vendor/blanket.js"
|
||||||
|
data-cover-adapter="vendor/mocha-blanket.js"></script>
|
||||||
|
<script>mocha.setup('bdd');</script>
|
||||||
|
<script src="cow.js" data-cover></script>
|
||||||
|
<script src="cow_test.js"></script>
|
||||||
|
<script>mocha.run();</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
</code></pre>
|
||||||
|
|
||||||
|
<p><strong>Notes</strong>:</p>
|
||||||
|
<ul>
|
||||||
|
<li>Notice the <code>data-cover</code> attribute we added to the script tag
|
||||||
|
loading the source of our library;</li>
|
||||||
|
<li>The HTML test file <em>must</em> be served over HTTP for the adapter to
|
||||||
|
be loaded.</li>
|
||||||
|
</ul>
|
||||||
|
<p>Running the tests now gives us something like this:</p>
|
||||||
|
<p>
|
||||||
|
<img alt="screenshot" src="/static/code/2013/blanket-coverage.png"/>
|
||||||
|
</p>
|
||||||
|
<p>As you can see, the report at the bottom highlights that we haven't actually
|
||||||
|
tested the case where an error is raised in case a target name is missing.
|
||||||
|
We've been informed of that, nothing more, nothing less. We simply know
|
||||||
|
we're missing a test here. Isn't this cool? I think so!</p>
|
||||||
|
<p>Just remember that code coverage will only <a href="http://codebetter.com/karlseguin/2008/12/09/code-coverage-use-it-wisely/">bring you numbers</a> and
|
||||||
|
raw information, not actual proofs that the whole of your <em>code logic</em> has
|
||||||
|
been actually covered. If you ask me, the best inputs you can get about
|
||||||
|
your code logic and implementation ever are the ones issued out of <a href="http://www.extremeprogramming.org/rules/pair.html">pair programming</a>
|
||||||
|
sessions
|
||||||
|
and <a href="http://alexgaynor.net/2013/sep/26/effective-code-review/">code reviews</a> —
|
||||||
|
but that's another story.</p>
|
||||||
|
<p><strong>So is code coverage silver bullet? No. Is it useful? Definitely. Happy testing!</strong>
|
||||||
|
</p>
|
||||||
|
</section>
|
||||||
|
<aside>
|
||||||
|
<p> <span class="article-author" itemprop="author" itemscope="" itemtype="http://schema.org/Person">
|
||||||
|
<span itemprop="name">Nicolas Perriault</span> —</span>
|
||||||
|
<time
|
||||||
|
datetime="2013-09-29" itemprop="datePublished">2013-09-29</time>— in <a href="/code/" itemprop="genre">Code</a>
|
||||||
|
— <a href="/code/2013/get-your-frontend-javascript-code-covered/">Permalink</a>
|
||||||
|
—
|
||||||
|
<a
|
||||||
|
rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/">License</a>— <a href="http://flattr.com/submit/auto?url=https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/&title=Get your Frontend JavaScript Code Covered&user_id=n1k0&category=software&language=en">flattr this</a>
|
||||||
|
|
||||||
|
</p>
|
||||||
|
</aside>
|
||||||
|
<hr/>
|
||||||
|
<nav> <a class="prev" href="/code/2013/functional-javascript-for-crawling-the-web/">Functional JavaScript for crawling the Web</a>
|
||||||
|
|
|
||||||
|
<a
|
||||||
|
class="next" href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">Testing your frontend JavaScript code using mocha, chai, and sinon</a>
|
||||||
|
</nav>
|
||||||
|
</article>
|
||||||
|
</main>
|
||||||
|
<nav class="sidebar">
|
||||||
|
<ul>
|
||||||
|
<li class="home"><a href="/" hreflang="en">Home</a>
|
||||||
|
</li>
|
||||||
|
<li class="code"><a href="/code/" hreflang="en">Code</a>
|
||||||
|
</li>
|
||||||
|
<li class="photography"><a href="/photography/" hreflang="en">Photography</a>
|
||||||
|
</li>
|
||||||
|
<li class="talks"><a href="/talks/" hreflang="en">Talks</a>
|
||||||
|
</li>
|
||||||
|
<li class="carnet"><a href="/carnet/" hreflang="fr">Carnet <span>fr</span></a>
|
||||||
|
</li>
|
||||||
|
<li class="contact"><a href="/contact/" hreflang="en">Contact</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
<footer class="site-footer">
|
||||||
|
<p>© 2012 Nicolas Perriault — <a href="https://twitter.com/n1k0">Tweet at me</a>
|
||||||
|
—
|
||||||
|
<a
|
||||||
|
href="https://github.com/n1k0">Get my code</a>— <a href="http://500px.com/n1k0">Enjoy my pics</a>
|
||||||
|
— <a href="/contact/">Contact me</a>
|
||||||
|
|
||||||
|
</p>
|
||||||
|
</footer>
|
||||||
|
</div>
|
||||||
|
<!-- /container -->
|
||||||
|
<script src="//ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script>
|
||||||
|
<script>
|
||||||
|
window.jQuery || document.write('<script src="js/libs/jquery-1.7.1.min.js"><\/script>')
|
||||||
|
</script>
|
||||||
|
<script type="text/javascript" src="/static/js/libs/prettify/prettify.js"></script>
|
||||||
|
<script type="text/javascript" src="/static/js/app.js"></script>
|
||||||
|
<script src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
@ -0,0 +1 @@
|
|||||||
|
[]
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1 @@
|
|||||||
|
[]
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": "Dublin Core property author",
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "Dublin Core property description",
|
||||||
|
"Image": null,
|
||||||
|
"Title": "Dublin Core property title",
|
||||||
|
"SiteName": null
|
||||||
|
}
|
@ -0,0 +1,18 @@
|
|||||||
|
<article>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
</article>
|
@ -0,0 +1,45 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<title>Title Element</title>
|
||||||
|
<meta name="title" content="Meta name title"/>
|
||||||
|
<meta name="og:title" content="Open Graph name title"/>
|
||||||
|
<meta name="twitter:title" content="Twitter name title"/>
|
||||||
|
<meta name="DC.title" content="Dublin Core name title"/>
|
||||||
|
<meta property="dc:title" content="Dublin Core property title"/>
|
||||||
|
<meta property="twitter:title" content="Twitter property title"/>
|
||||||
|
<meta property="og:title" content="Open Graph property title"/>
|
||||||
|
<meta name="author" content="Meta name author"/>
|
||||||
|
<meta name="DC.creator" content="Dublin Core name author"/>
|
||||||
|
<meta property="dc:creator" content="Dublin Core property author"/>
|
||||||
|
<meta name="description" content="Meta name description"/>
|
||||||
|
<meta name="og:description" content="Open Graph name description"/>
|
||||||
|
<meta name="twitter:description" content="Twitter name description"/>
|
||||||
|
<meta name="DC.description" content="Dublin Core name description"/>
|
||||||
|
<meta property="dc:description" content="Dublin Core property description"/>
|
||||||
|
<meta property="twitter:description" content="Twitter property description"/>
|
||||||
|
<meta property="og:description" content="Open Graph property description"/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Test document title</h1>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,3 @@
|
|||||||
|
[
|
||||||
|
"http:\/\/fakehost.com\/image.jpg"
|
||||||
|
]
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": "Creator Name",
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "Preferred description",
|
||||||
|
"Image": "http:\/\/fakehost.com\/image.jpg",
|
||||||
|
"Title": "Preferred title",
|
||||||
|
"SiteName": null
|
||||||
|
}
|
@ -0,0 +1,18 @@
|
|||||||
|
<article>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
</article>
|
@ -0,0 +1,36 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<title>Title Element</title>
|
||||||
|
<meta property="x:title dc:title" content="Preferred title"/>
|
||||||
|
<meta property="og:title twitter:title" content="A title"/>
|
||||||
|
<meta property="dc:creator twitter:site_name" content="Creator Name"/>
|
||||||
|
<meta name="author" content="FAIL"/>
|
||||||
|
<meta property="og:description x:description twitter:description" content="A description"/>
|
||||||
|
<meta property="dc:description og:description" content="Preferred description"/>
|
||||||
|
<meta property="twitter:image og:image" content="http://fakehost.com/image.jpg"/>
|
||||||
|
<meta name="description" content="FAIL"/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Test document title</h1>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"ArticleByLine": true
|
||||||
|
}
|
@ -0,0 +1,3 @@
|
|||||||
|
[
|
||||||
|
"https:\/\/www.aclu.org\/sites\/default\/files\/styles\/metatag_og_image_1200x630\/public\/field_share_image\/web18-facebook-socialshare-1200x628-v02.png?itok=p77cQjOm"
|
||||||
|
]
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"ArticleByLine": true
|
||||||
|
}
|
@ -0,0 +1,3 @@
|
|||||||
|
[
|
||||||
|
"http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-640x426.jpg"
|
||||||
|
]
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": "by Dan Goodin - Apr 16, 2015 8:02 pm UTC",
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "Two-year-old bug exposes thousands of servers to crippling attack.",
|
||||||
|
"Image": "http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-640x426.jpg",
|
||||||
|
"Title": "Just-released Minecraft exploit makes it easy to crash game servers",
|
||||||
|
"SiteName": "Ars Technica"
|
||||||
|
}
|
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"0": "http:\/\/fakehost\/test\/base\/foo\/bar\/baz.png",
|
||||||
|
"2": "http:\/\/fakehost\/foo\/bar\/baz.png",
|
||||||
|
"3": "http:\/\/test\/foo\/bar\/baz.png",
|
||||||
|
"4": "https:\/\/test\/foo\/bar\/baz.png"
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": null,
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
|
||||||
|
"Image": null,
|
||||||
|
"Title": "Base URL with base relative test",
|
||||||
|
"SiteName": null
|
||||||
|
}
|
@ -0,0 +1,33 @@
|
|||||||
|
<article>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
<p>Links</p>
|
||||||
|
<p><a href="http://fakehost/test/base/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="http://fakehost/test/base/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="#foo">link</a></p>
|
||||||
|
<p><a href="http://fakehost/test/base/baz.html#foo">link</a></p>
|
||||||
|
<p><a href="http://fakehost/foo/bar/baz.html#foo">link</a></p>
|
||||||
|
<p><a href="http://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="https://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p>Images</p>
|
||||||
|
<p><img src="http://fakehost/test/base/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="http://fakehost/test/base/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="http://fakehost/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="http://test/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="https://test/foo/bar/baz.png"></img></p>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<p>
|
||||||
|
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
</article>
|
@ -0,0 +1,44 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<base href="base/"/>
|
||||||
|
<title>Base URL with base relative test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
<p>Links</p>
|
||||||
|
<p><a href="foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="./foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="#foo">link</a></p>
|
||||||
|
<p><a href="baz.html#foo">link</a></p>
|
||||||
|
<p><a href="/foo/bar/baz.html#foo">link</a></p>
|
||||||
|
<p><a href="http://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="https://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p>Images</p>
|
||||||
|
<p><img src="foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="./foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="/foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="http://test/foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="https://test/foo/bar/baz.png"/></p>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"0": "http:\/\/fakehost\/foo\/bar\/baz.png",
|
||||||
|
"3": "http:\/\/test\/foo\/bar\/baz.png",
|
||||||
|
"4": "https:\/\/test\/foo\/bar\/baz.png"
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": null,
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
|
||||||
|
"Image": null,
|
||||||
|
"Title": "Base URL with base test",
|
||||||
|
"SiteName": null
|
||||||
|
}
|
@ -0,0 +1,33 @@
|
|||||||
|
<article>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
<p>Links</p>
|
||||||
|
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="#foo">link</a></p>
|
||||||
|
<p><a href="http://fakehost/baz.html#foo">link</a></p>
|
||||||
|
<p><a href="http://fakehost/foo/bar/baz.html#foo">link</a></p>
|
||||||
|
<p><a href="http://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="https://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p>Images</p>
|
||||||
|
<p><img src="http://fakehost/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="http://fakehost/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="http://fakehost/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="http://test/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="https://test/foo/bar/baz.png"></img></p>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<p>
|
||||||
|
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
</article>
|
@ -0,0 +1,44 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<base href="/"/>
|
||||||
|
<title>Base URL with base test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
<p>Links</p>
|
||||||
|
<p><a href="foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="./foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="#foo">link</a></p>
|
||||||
|
<p><a href="baz.html#foo">link</a></p>
|
||||||
|
<p><a href="/foo/bar/baz.html#foo">link</a></p>
|
||||||
|
<p><a href="http://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="https://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p>Images</p>
|
||||||
|
<p><img src="foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="./foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="/foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="http://test/foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="https://test/foo/bar/baz.png"/></p>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"0": "http:\/\/fakehost\/test\/foo\/bar\/baz.png",
|
||||||
|
"2": "http:\/\/fakehost\/foo\/bar\/baz.png",
|
||||||
|
"3": "http:\/\/test\/foo\/bar\/baz.png",
|
||||||
|
"4": "https:\/\/test\/foo\/bar\/baz.png"
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": null,
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
|
||||||
|
"Image": null,
|
||||||
|
"Title": "Base URL test",
|
||||||
|
"SiteName": null
|
||||||
|
}
|
@ -0,0 +1,33 @@
|
|||||||
|
<article>
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
<p>Links</p>
|
||||||
|
<p><a href="http://fakehost/test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="http://fakehost/test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="#foo">link</a></p>
|
||||||
|
<p><a href="http://fakehost/test/baz.html#foo">link</a></p>
|
||||||
|
<p><a href="http://fakehost/foo/bar/baz.html#foo">link</a></p>
|
||||||
|
<p><a href="http://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="https://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p>Images</p>
|
||||||
|
<p><img src="http://fakehost/test/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="http://fakehost/test/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="http://fakehost/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="http://test/foo/bar/baz.png"></img></p>
|
||||||
|
<p><img src="https://test/foo/bar/baz.png"></img></p>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<p>
|
||||||
|
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
</article>
|
@ -0,0 +1,43 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<title>Base URL test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
<p>Links</p>
|
||||||
|
<p><a href="foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="./foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="#foo">link</a></p>
|
||||||
|
<p><a href="baz.html#foo">link</a></p>
|
||||||
|
<p><a href="/foo/bar/baz.html#foo">link</a></p>
|
||||||
|
<p><a href="http://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p><a href="https://test/foo/bar/baz.html">link</a></p>
|
||||||
|
<p>Images</p>
|
||||||
|
<p><img src="foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="./foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="/foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="http://test/foo/bar/baz.png"/></p>
|
||||||
|
<p><img src="https://test/foo/bar/baz.png"/></p>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1 @@
|
|||||||
|
[]
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": null,
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua.",
|
||||||
|
"Image": null,
|
||||||
|
"Title": "Basic tag cleaning test",
|
||||||
|
"SiteName": null
|
||||||
|
}
|
@ -0,0 +1,18 @@
|
|||||||
|
<div>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua.</p>
|
||||||
|
<p>Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div><div>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
@ -0,0 +1,36 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<title>Basic tag cleaning test</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<div>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua.</p>
|
||||||
|
<p>Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<iframe src="about:blank">Iframe fallback test</iframe>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat.</p>
|
||||||
|
<object data="foo.swf" type="application/x-shockwave-flash" width="88" height="31">
|
||||||
|
<param movie="foo.swf" />
|
||||||
|
</object>
|
||||||
|
<embed src="foo.swf"/>
|
||||||
|
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"0": "http:\/\/ichef.bbci.co.uk\/news\/1024\/cpsprodpb\/3D8B\/production\/_84455751_84455749.jpg",
|
||||||
|
"1": "http:\/\/ichef.bbci.co.uk\/news\/555\/cpsprodpb\/462D\/production\/_84456971_gettyimages-167501087.jpg",
|
||||||
|
"2": "http:\/\/ichef.bbci.co.uk\/news\/555\/media\/images\/76020000\/jpg\/_76020974_line976.jpg",
|
||||||
|
"3": "http:\/\/ichef-1.bbci.co.uk\/news\/555\/cpsprodpb\/6D3D\/production\/_84456972_p072315al-0500.jpg",
|
||||||
|
"5": "http:\/\/ichef-1.bbci.co.uk\/news\/555\/cpsprodpb\/142FD\/production\/_84458628_shirtreuters.jpg"
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": null,
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "President Barack Obama tells the BBC his failure to pass \"common sense gun safety laws\" is the greatest frustration of his presidency.",
|
||||||
|
"Image": "http:\/\/ichef.bbci.co.uk\/news\/1024\/cpsprodpb\/3D8B\/production\/_84455751_84455749.jpg",
|
||||||
|
"Title": "Obama admits US gun laws are his 'biggest frustration' - BBC News",
|
||||||
|
"SiteName": "BBC News"
|
||||||
|
}
|
@ -0,0 +1,17 @@
|
|||||||
|
<div property="articleBody">
|
||||||
|
<p>President Barack Obama has admitted that his failure to pass "common sense gun safety laws" in the US is the greatest frustration of his presidency. </p><p>In an interview with the BBC, Mr Obama said it was "distressing" not to have made progress on the issue "even in the face of repeated mass killings".</p><p>He vowed to keep trying, but the BBC's North America editor Jon Sopel said the president did not sound very confident. </p><p>However, Mr Obama said race relations had improved during his presidency. </p><p>Hours after the interview, a gunman opened fire at a cinema in the US state of Louisiana, killing two people and injuring several others before shooting himself.</p><p>In a wide-ranging interview, President Obama also said:</p><ul><li>
|
||||||
|
<a href="http://www.bbc.co.uk/news/uk-politics-33647154">The UK must stay in the EU</a> to have influence on the world stage</li>
|
||||||
|
<li>He is confident the Iran nuclear deal will be passed by Congress </li>
|
||||||
|
<li>Syria needs a political solution in order to defeat the Islamic State group</li>
|
||||||
|
<li>He would speak "bluntly" against corruption <a href="http://www.bbc.co.uk/news/world-us-canada-33646563">and human rights violations in Kenya</a>
|
||||||
|
</li>
|
||||||
|
<li>He would defend his advocacy of gay rights following protests in Kenya</li>
|
||||||
|
<li>Despite racial tensions, the US is becoming more diverse and more tolerant</li>
|
||||||
|
</ul><p><a href="http://www.bbc.co.uk/news/world-us-canada-33646542">Read the full transcript of his interview</a></p><p>Mr Obama lands in Kenya later on Friday for his first visit since becoming president. </p><p>But with just 18 months left in power, he said gun control was the area where he has been "most frustrated and most stymied" since coming to power in 2009.</p><p>"If you look at the number of Americans killed since 9/11 by terrorism, it's less than 100. If you look at the number that have been killed by gun violence, it's in the tens of thousands," Mr Obama said. </p><figure><img alt="Gun control campaigners protest in McPhearson Square in Washington DC - 25 April 2013" datasrc="http://ichef.bbci.co.uk/news/976/cpsprodpb/462D/production/_84456971_gettyimages-167501087.jpg" height="549" src="http://ichef.bbci.co.uk/news/555/cpsprodpb/462D/production/_84456971_gettyimages-167501087.jpg" width="976"></img><figcaption><span>
|
||||||
|
The president said he would continue fighting for greater gun control laws
|
||||||
|
</span>
|
||||||
|
</figcaption></figure><p>"For us not to be able to resolve that issue has been something that is distressing," he added. </p><p>Mr Obama has pushed for stricter gun control throughout his presidency but has been unable to secure any significant changes to the laws. </p><p>After nine African-American churchgoers were killed in South Carolina in June, he admitted "politics in this town" meant there were few options available.</p><figure><img alt="line" datasrc="http://ichef.bbci.co.uk/news/464/media/images/76020000/jpg/_76020974_line976.jpg" height="2" src="http://ichef.bbci.co.uk/news/555/media/images/76020000/jpg/_76020974_line976.jpg" width="464"></img></figure><h2>Analysis: Jon Sopel, BBC News, Washington</h2><figure><img alt="President Barack Obama participates in an interview with Jon Sopel of BBC in the Roosevelt Room of the White House - 23 July 2015" datasrc="http://ichef-1.bbci.co.uk/news/976/cpsprodpb/6D3D/production/_84456972_p072315al-0500.jpg" height="549" src="http://ichef-1.bbci.co.uk/news/555/cpsprodpb/6D3D/production/_84456972_p072315al-0500.jpg" width="976"></img></figure><p>Nine months ago, the president seemed like a spent force, after taking a beating in the midterm elections, during which members of his own party were reluctant to campaign on his record. </p><p>But the man sat before me today was relaxed and confident, buoyed by a string of "wins" on healthcare, Cuba and Iran, after bitter and ongoing battles with his many critics. </p><p>The only body swerve the president performed was when I asked him <a href="http://www.bbc.co.uk/news/world-us-canada-33643168"> how many minds he had changed on the Iran nuclear deal </a>after an intense sell aimed at Gulf allies and members of US Congress who remain implacably opposed. </p><p>There was a momentary flicker across the president's face as if to say "You think you got me?" before his smile returned and he proceeded to talk about how Congress would come round.</p><p>But notably, he did not give a direct answer to that question, which leaves me with the impression that he has persuaded precisely zero.</p><p><a href="http://www.bbc.co.uk/news/world-us-canada-33646875">Five things we learned from Obama interview</a></p><p><a href="http://www.bbc.co.uk/news/world-us-canada-33646545">The presidential body swerve</a></p><figure><img alt="line" datasrc="http://ichef.bbci.co.uk/news/464/media/images/76020000/jpg/_76020974_line976.jpg" height="2" src="http://ichef.bbci.co.uk/news/555/media/images/76020000/jpg/_76020974_line976.jpg" width="464"></img></figure><p>On race relations, Mr Obama said recent concerns around policing and mass incarcerations were "legitimate and deserve intense attention" but insisted progress had been made. </p><p>Children growing up during the eight years of his presidency "will have a different view of race relations in this country and what's possible," he said. </p><p>"There are going to be tensions that arise. But if you look at my daughters' generation, they have an attitude about race that's entirely different than even my generation."</p><p>Talking about how he was feeling after his recent successes, he said "every president, every leader has strengths and weaknesses". </p><p>"One of my strengths is I have a pretty even temperament. I don't get too high when it's high and I don't get too low when it's low," he said. </p><figure><img alt="Customer looks at Obama shirts at a stall in Nairobi's Kibera slums, 23 July 2015" datasrc="http://ichef-1.bbci.co.uk/news/976/cpsprodpb/142FD/production/_84458628_shirtreuters.jpg" height="549" src="http://ichef-1.bbci.co.uk/news/555/cpsprodpb/142FD/production/_84458628_shirtreuters.jpg" width="976"></img><figcaption><span>
|
||||||
|
Kenya is getting ready to welcome the US president
|
||||||
|
</span>
|
||||||
|
</figcaption></figure><h2>Kenya trip</h2><p>Mr Obama was speaking to the BBC at the White House before departing for Kenya.</p><p>His father was Kenyan and the president is expected to meet relatives in Nairobi.</p><p>Mr Obama has faced criticism in the country after the US legalised gay marriage. However, in his interview, the president said he would not fall silent on the issue.</p><p>"I am not a fan of discrimination and bullying of anybody on the basis of race, on the basis of religion, on the basis of sexual orientation or gender," he said.</p><p>The president also admitted that some African governments, including Kenya's, needed to improve their records on human rights and democracy. However, he defended his decision to engage with and visit those governments. </p><p>"Well, they're not ideal institutions. But what we found is, is that when we combined blunt talk with engagement, that gives us the best opportunity to influence and open up space for civil society." </p><p>Mr Obama will become the first US president to address the African Union when he travels on to Ethiopia on Sunday.</p>
|
||||||
|
</div>
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,6 @@
|
|||||||
|
[
|
||||||
|
"https:\/\/1.bp.blogspot.com\/-YIPC5jkXkDE\/Vy7YPSqFKWI\/AAAAAAAAAxI\/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB\/w1200-h630-p-nu\/block-diagram.png",
|
||||||
|
"https:\/\/1.bp.blogspot.com\/-YIPC5jkXkDE\/Vy7YPSqFKWI\/AAAAAAAAAxI\/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB\/s640\/block-diagram.png",
|
||||||
|
"https:\/\/1.bp.blogspot.com\/-k3naUT3uXao\/Vy7WFac246I\/AAAAAAAAAw8\/mePy_ostO8QJra5ZJrbP2WGhTlJ0B_r8gCLcB\/s640\/schematic-from-hell.png",
|
||||||
|
"https:\/\/2.bp.blogspot.com\/-kIekczO693g\/Vy7dBqYifXI\/AAAAAAAAAxc\/hMNJBs5bedIQOrBzzkhq4gbmhR-n58EQwCLcB\/s400\/graph-labels.png"
|
||||||
|
]
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": null,
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "I've written a couple of posts in the past few months but they were all for the blog at work so I figured I'm long overdue for one on Silic...",
|
||||||
|
"Image": "https:\/\/1.bp.blogspot.com\/-YIPC5jkXkDE\/Vy7YPSqFKWI\/AAAAAAAAAxI\/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB\/w1200-h630-p-nu\/block-diagram.png",
|
||||||
|
"Title": "Open Verilog flow for Silego GreenPak4 programmable logic devices",
|
||||||
|
"SiteName": null
|
||||||
|
}
|
@ -0,0 +1,57 @@
|
|||||||
|
<div id="post-body-932306423056216142" itemprop="description articleBody"><p>
|
||||||
|
I've written a couple of posts in the past few months but they were all for <a href="http://blog.ioactive.com/search/label/Andrew%20Zonenberg">the blog at work</a> so I figured I'm long overdue for one on Silicon Exposed.</p><h2>
|
||||||
|
So what's a GreenPak?</h2>
|
||||||
|
<p> Silego Technology is a fabless semiconductor company located in the SF Bay area, which makes (among other things) a line of programmable logic devices known as GreenPak. Their <a href="http://www.silego.com/products/greenpak5.html">5th generation parts</a> were just announced, but I started this project before that happened so I'm still targeting the <a href="http://www.silego.com/products/greenpak4.html">4th generation</a>.</p><p> GreenPak devices are kind of like itty bitty <a href="http://www.cypress.com/products/32-bit-arm-cortex-m-psoc">PSoCs</a> - they have a mixed signal fabric with an ADC, DACs, comparators, voltage references, plus a digital LUT/FF fabric and some typical digital MCU peripherals like counters and oscillators (but no CPU).</p><p> It's actually an interesting architecture - FPGAs (including some devices marketed as CPLDs) are a 2D array of LUTs connected via wires to adjacent cells, and true (product term) CPLDs are a star topology of AND-OR arrays connected by a crossbar. GreenPak, on the other hand, is a star topology of LUTs, flipflops, and analog/digital hard IP connected to a crossbar.</p><p> Without further ado, here's a block diagram showing all the cool stuff you get in the SLG46620V:</p><table readabilityDataTable="0"><tbody><tr><td>
|
||||||
|
<a href="https://1.bp.blogspot.com/-YIPC5jkXkDE/Vy7YPSqFKWI/AAAAAAAAAxI/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB/s1600/block-diagram.png" imageanchor="1"><img height="512" src="https://1.bp.blogspot.com/-YIPC5jkXkDE/Vy7YPSqFKWI/AAAAAAAAAxI/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB/s640/block-diagram.png" width="640"></img></a>
|
||||||
|
</td>
|
||||||
|
</tr><tr><td>SLG46620V block diagram (from device datasheet)</td>
|
||||||
|
</tr></tbody></table><p>
|
||||||
|
They're also tiny (the SLG46620V is a 20-pin 0.4mm pitch STQFN measuring 2x3 mm, and the lower gate count SLG46140V is a mere 1.6x2 mm) and probably the cheapest programmable logic device on the market - $0.50 in low volume and less than $0.40 in larger quantities.</p><p> The Vdd range of GreenPak4 is huge, more like what you'd expect from an MCU than an FPGA! It can run on anything from 1.8 to 5V, although performance is only specified at 1.8, 3.3, and 5V nominal voltages. There's also a dual-rail version that trades one of the GPIO pins for a second power supply pin, allowing you to interface to logic at two different voltage levels.</p><p> To support low-cost/space-constrained applications, they even have the configuration memory on die. It's one-time programmable and needs external Vpp to program (presumably Silego didn't want to waste die area on charge pumps that would only be used once) but has a SRAM programming mode for prototyping.</p><p> The best part is that the development software (GreenPak Designer) is free of charge and provided for all major operating systems including Linux! Unfortunately, the only supported design entry method is schematic entry and there's no way to write your design in a HDL.</p><p> While schematics may be fine for quick tinkering on really simple designs, they quickly get unwieldy. The nightmare of a circuit shown below is just a bunch of counters hooked up to LEDs that blink at various rates.</p><table readabilityDataTable="0"><tbody><tr><td>
|
||||||
|
<a href="https://1.bp.blogspot.com/-k3naUT3uXao/Vy7WFac246I/AAAAAAAAAw8/mePy_ostO8QJra5ZJrbP2WGhTlJ0B_r8gCLcB/s1600/schematic-from-hell.png" imageanchor="1"><img height="334" src="https://1.bp.blogspot.com/-k3naUT3uXao/Vy7WFac246I/AAAAAAAAAw8/mePy_ostO8QJra5ZJrbP2WGhTlJ0B_r8gCLcB/s640/schematic-from-hell.png" width="640"></img></a>
|
||||||
|
</td>
|
||||||
|
</tr><tr><td>Schematic from hell!</td>
|
||||||
|
</tr></tbody></table><p>
|
||||||
|
As if this wasn't enough of a problem, the largest GreenPak4 device (the SLG46620V) is split into two halves with limited routing between them, and the GUI doesn't help the user manage this complexity at all - you have to draw your schematic in two halves and add "cross connections" between them.</p><p> The icing on the cake is that schematics are a pain to diff and collaborate on. Although GreenPak schematics are XML based, which is a touch better than binary, who wants to read a giant XML diff and try to figure out what's going on in the circuit?</p><p> This isn't going to be a post on the quirks of Silego's software, though - that would be boring. As it turns out, there's one more exciting feature of these chips that I didn't mention earlier: the configuration bitstream is 100% documented in the device datasheet! This is unheard of in the programmable logic world. As Nick of Arachnid Labs <a href="http://www.arachnidlabs.com/blog/2015/03/30/greenpak/">says</a>, the chip is "just dying for someone to write a VHDL or Verilog compiler for it". As you can probably guess by from the title of this post, I've been busy doing exactly that.</p><h2>
|
||||||
|
Great! How does it work?</h2>
|
||||||
|
<p> Rather than wasting time writing a synthesizer, I decided to write a GreenPak technology library for Clifford Wolf's excellent open source synthesis tool, <a href="http://www.clifford.at/yosys/">Yosys</a>, and then make a place-and-route tool to turn that into a final netlist. The post-PAR netlist can then be loaded into GreenPak Designer in order to program the device.</p><p> The first step of the process is to run the "synth_greenpak4" Yosys flow on the Verilog source. This runs a generic RTL synthesis pass, then some coarse-grained extraction passes to infer shift register and counter cells from behavioral logic, and finally maps the remaining logic to LUT/FF cells and outputs a JSON-formatted netlist.</p><p> Once the design has been synthesized, my tool (named, surprisingly, gp4par) is then launched on the netlist. It begins by parsing the JSON and constructing a directed graph of cell objects in memory. A second graph, containing all of the primitives in the device and the legal connections between them, is then created based on the device specified on the command line. (As of now only the SLG46620V is supported; the SLG46621V can be added fairly easily but the SLG46140V has a slightly different microarchitecture which will require a bit more work to support.)</p><p> After the graphs are generated, each node in the netlist graph is assigned a numeric label identifying the type of cell and each node in the device graph is assigned a list of legal labels: for example, an I/O buffer site is legal for an input buffer, output buffer, or bidirectional buffer.</p><table readabilityDataTable="0"><tbody><tr><td>
|
||||||
|
<a href="https://2.bp.blogspot.com/-kIekczO693g/Vy7dBqYifXI/AAAAAAAAAxc/hMNJBs5bedIQOrBzzkhq4gbmhR-n58EQwCLcB/s1600/graph-labels.png" imageanchor="1"><img height="141" src="https://2.bp.blogspot.com/-kIekczO693g/Vy7dBqYifXI/AAAAAAAAAxc/hMNJBs5bedIQOrBzzkhq4gbmhR-n58EQwCLcB/s400/graph-labels.png" width="400"></img></a>
|
||||||
|
</td>
|
||||||
|
</tr><tr><td>Example labeling for a subset of the netlist and device graphs</td>
|
||||||
|
</tr></tbody></table><p>
|
||||||
|
The labeled nodes now need to be placed. The initial placement uses a simple greedy algorithm to create a valid (although not necessarily optimal or even routable) placement:</p><ol><li>Loop over the cells in the netlist. If any cell has a LOC constraint, which locks the cell to a specific physical site, attempt to assign the node to the specified site. If the specified node is the wrong type, doesn't exist, or is already used by another constrained node, the constraint is invalid so fail with an error.</li>
|
||||||
|
<li>Loop over all of the unconstrained cells in the netlist and assign them to the first unused site with the right label. If none are available, the design is too big for the device so fail with an error.</li>
|
||||||
|
</ol><p>
|
||||||
|
Once the design is placed, the placement optimizer then loops over the design and attempts to improve it. A simulated annealing algorithm is used, where changes to the design are accepted unconditionally if they make the placement better, and with a random, gradually decreasing probability if they make it worse. The optimizer terminates when the design receives a perfect score (indicating an optimal placement) or if it stops making progress for several iterations. Each iteration does the following:</p><ol><li>Compute a score for the current design based on the number of unroutable nets, the amount of routing congestion (number of nets crossing between halves of the device), and static timing analysis (not yet implemented, always zero).</li>
|
||||||
|
<li>Make a list of nodes that contributed to this score in some way (having some attached nets unroutable, crossing to the other half of the device, or failing timing).</li>
|
||||||
|
<li>Remove nodes from the list that are LOC'd to a specific location since we're not allowed to move them.</li>
|
||||||
|
<li>Remove nodes from the list that have only one legal placement in the device (for example, oscillator hard IP) since there's nowhere else for them to go.</li>
|
||||||
|
<li>Pick a node from the remainder of the list at random. Call this our pivot.</li>
|
||||||
|
<li>Find a list of candidate placements for the pivot: </li>
|
||||||
|
<ol><li>Consider all routable placements in the other half of the device.</li>
|
||||||
|
<li>If none were found, consider all routable placements anywhere in the device.</li>
|
||||||
|
<li>If none were found, consider all placements anywhere in the device even if they're not routable.</li>
|
||||||
|
</ol><li>Pick one of the candidates at random and move the pivot to that location. If another cell in the netlist is already there, put it in the vacant site left by the pivot.</li>
|
||||||
|
<li>Re-compute the score for the design. If it's better, accept this change and start the next iteration.</li>
|
||||||
|
<li>If the score is worse, accept it with a random probability which decreases as the iteration number goes up. If the change is not accepted, restore the previous placement.</li>
|
||||||
|
</ol><p>
|
||||||
|
After optimization, the design is checked for routability. If any edges in the netlist graph don't correspond to edges in the device graph, the user probably asked for something impossible (for example, trying to hook a flipflop's output to a comparator's reference voltage input) so fail with an error.</p><p> The design is then routed. This is quite simple due to the crossbar structure of the device. For each edge in the netlist:</p><ol><li>If dedicated (non-fabric) routing is used for this path, configure the destination's input mux appropriately and stop.</li>
|
||||||
|
<li>If the source and destination are in the same half of the device, configure the destination's input mux appropriately and stop.</li>
|
||||||
|
<li>A cross-connection must be used. Check if we already used one to bring the source signal to the other half of the device. If found, configure the destination to route from that cross-connection and stop.</li>
|
||||||
|
<li>Check if we have any cross-connections left going in this direction. If they're all used, the design is unroutable due to congestion so fail with an error.</li>
|
||||||
|
<li>Pick the next unused cross-connection and configure it to route from the source. Configure the destination to route from the cross-connection and stop.</li>
|
||||||
|
</ol><p>
|
||||||
|
Once routing is finished, run a series of post-PAR design rule checks. These currently include the following:</p><ul><li>If any node has no loads, generate a warning</li>
|
||||||
|
<li>If an I/O buffer is connected to analog hard IP, fail with an error if it's not configured in analog mode.</li>
|
||||||
|
<li>Some signals (such as comparator inputs and oscillator power-down controls) are generated by a shared mux and fed to many loads. If different loads require conflicting settings for the shared mux, fail with an error.</li>
|
||||||
|
</ul><p>
|
||||||
|
If DRC passes with no errors, configure all of the individual cells in the netlist based on the HDL parameters. Fail with an error if an invalid configuration was requested.</p><p> Finally, generate the bitstream from all of the per-cell configuration and write it to a file.</p><h2>
|
||||||
|
Great, let's get started!</h2><p>
|
||||||
|
If you don't already have one, you'll need to buy a <a href="http://www.silego.com/buy/index.php?main_page=product_info&products_id=388">GreenPak4 development kit</a>. The kit includes samples of the SLG46620V (among other devices) and a programmer/emulation board. While you're waiting for it to arrive, install <a href="http://www.silego.com/softdoc/software.html">GreenPak Designer</a>.</p><p> Download and install Yosys. Although Clifford is pretty good at merging my pull requests, only <a href="https://github.com/azonenberg/yosys/">my fork on Github</a> is guaranteed to have the most up-to-date support for GreenPak devices so don't be surprised if you can't use a bleeding-edge feature with mainline Yosys.</p><p> Download and install gp4par. You can get it from <a href="https://github.com/azonenberg/openfpga/">the Github repository</a>.</p><p> Write your HDL, compile with Yosys, P&R with gp4par, and import the bitstream into GreenPak Designer to program the target device. The most current gp4par manual is included in LaTeX source form in the source tree and is automatically built as part of the compile process. If you're just browsing, there's a <a href="http://thanatos.virtual.antikernel.net/unlisted/gp4-hdl.pdf">relatively recent PDF version</a> on my web server.</p><p> If you'd like to see the Verilog that produced the nightmare of a schematic I showed above, <a href="https://github.com/azonenberg/openfpga/blob/master/tests/greenpak4/Blinky/Blinky.v">here it is</a>.</p><p> Be advised that this project is still very much a work in progress and there are still a number of SLG46620V features I don't support (see the manual for exact details).</p><h2>
|
||||||
|
I love it / it segfaulted / there's a problem in the manual!</h2><p>
|
||||||
|
Hop in our IRC channel (##openfpga on Freenode) and let me know. Feedback is great, pull requests are even better,</p><h2>
|
||||||
|
You're competing with Silego's IDE. Have they found out and sued you yet?</h2><p>
|
||||||
|
Nope. They're fully aware of what I'm doing and are rolling out the red carpet for me. They love the idea of a HDL flow as an alternative to schematic entry and are pretty amazed at how fast it's coming together.</p><p> After I reported a few bugs in their datasheets they decided to skip the middleman and give me direct access to the engineer who writes their documentation so that I can get faster responses. The last time I found a problem (two different parts of the datasheet contradicted each other) an updated datasheet was in my inbox and on their website by the next day. I only wish Xilinx gave me that kind of treatment!</p><p> They've even <a href="https://twitter.com/SilegoTech/status/717018987771469824">offered me free hardware</a> to help me add support for their latest product family, although I plan to get GreenPak4 support to a more stable state before taking them up on the offer.</p><h2>
|
||||||
|
So what's next?</h2>
|
||||||
|
<p> Better testing, for starters. I have to verify functionality by hand with a DMM and oscilloscope, which is time consuming.</p><p> My contact at Silego says they're going to be giving me documentation on the SRAM emulation interface soon, so I'm going to make a hardware-in-loop test platform that connects to my desktop and the Silego ZIF socket, and lets me load new bitstreams via a scriptable interface. It'll have FPGA-based digital I/O as well as an ADC and DAC on every device pin, plus an adjustable voltage regulator for power, so I can feed in arbitrary mixed-signal test waveforms and write PC-based unit tests to verify correct behavior.</p><p> Other than that, I want to finish support for the SLG46620V in the next month or two. The SLG46621V will be an easy addition since only one pin and the relevant configuration bits have changed from the 46620 (I suspect they're the same die, just bonded out differently).</p><p> Once that's done I'll have to do some more extensive work to add the SLG46140V since the architecture is a bit different (a lot of the combinatorial logic is merged into multi-function blocks). Luckily, the 46140 has a lot in common architecturally with the GreenPak5 family, so once that's done GreenPak5 will probably be a lot easier to add support for.</p><p> My thanks go out to Clifford Wolf, whitequark, the IRC users in ##openfpga, and everyone at Silego I've worked with to help make this possible. I hope that one day this project will become mature enough that Silego will ship it as an officially supported extension to GreenPak Designer, making history by becoming the first modern programmable logic vendor to ship a fully open source synthesis and P&R suite.
|
||||||
|
</p>
|
||||||
|
</div>
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"ArticleByLine": true
|
||||||
|
}
|
@ -0,0 +1,4 @@
|
|||||||
|
[
|
||||||
|
"http:\/\/media.breitbart.com\/media\/2016\/11\/GettyImages-621866810.jpg",
|
||||||
|
"http:\/\/media.breitbart.com\/media\/2016\/11\/GettyImages-621866810-640x480.jpg"
|
||||||
|
]
|
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"Author": "by Lucas Nolan22 Dec 2016651",
|
||||||
|
"Direction": null,
|
||||||
|
"Excerpt": "Snopes fact checker and staff writer David Emery posted to Twitter asking if there were “any un-angry Trump supporters?”",
|
||||||
|
"Image": "http:\/\/media.breitbart.com\/media\/2016\/11\/GettyImages-621866810.jpg",
|
||||||
|
"Title": "'Neutral' Snopes Fact-Checker David Emery: 'Are There Any Un-Angry Trump Supporters?' - Breitbart",
|
||||||
|
"SiteName": "Breitbart"
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,9 @@
|
|||||||
|
[
|
||||||
|
"https:\/\/static.independent.co.uk\/s3fs-public\/thumbnails\/image\/2015\/12\/06\/10\/bed-hotel-room.jpg",
|
||||||
|
"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2014\/03\/18\/10\/bandb2.jpg",
|
||||||
|
"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2015\/05\/26\/11\/hotel-door-getty.jpg",
|
||||||
|
"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2013\/07\/31\/15\/luggage-3.jpg",
|
||||||
|
"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2015\/04\/13\/11\/Lifestyle-hotels.jpg",
|
||||||
|
"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2014\/03\/13\/16\/agenda7.jpg",
|
||||||
|
"http:\/\/fakehost\/sites\/all\/themes\/ines_themes\/independent_theme\/img\/reuse.png"
|
||||||
|
]
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,5 @@
|
|||||||
|
[
|
||||||
|
"http:\/\/s3-static-ak.buzzfed.com\/static\/2015-04\/22\/5\/campaign_images\/webdr03\/student-dies-after-diet-pills-she-bought-online-b-2-28712-1429696299-24_dblbig.jpg",
|
||||||
|
"http:\/\/ak-hdl.buzzfed.com\/static\/2015-04\/21\/5\/enhanced\/webdr12\/grid-cell-2501-1429608056-15.jpg",
|
||||||
|
"http:\/\/ak-hdl.buzzfed.com\/static\/2015-04\/21\/5\/enhanced\/webdr12\/grid-cell-2501-1429608057-18.jpg"
|
||||||
|
]
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue