http_header(User-agent): Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0
http_header(referer): https://www.wsj.com

## Use AMP version
single_page_link: concat('https://www.wsj.com/amp/articles/', substring-after(substring-after(//link[@rel="canonical"]/@href, 'https://www.wsj.com/'), '/'))
if_page_contains: //link[@rel="canonical" and not( contains(substring-after(substring-after(@href, 'https://www.wsj.com/'), '/'), '/') )]

single_page_link: concat('https://www.wsj.com/amp/articles/', substring-after(substring-after(substring-after(//link[@rel="canonical"]/@href, 'https://www.wsj.com/'), '/'), '/'))
if_page_contains: //link[@rel="canonical" and not( contains(substring-after(substring-after(substring-after(@href, 'https://www.wsj.com/'), '/'), '/'), '/') )]

single_page_link: concat('https://www.wsj.com/amp/articles/', substring-after(substring-after(substring-after(substring-after(//link[@rel="canonical"]/@href, 'https://www.wsj.com/'), '/'), '/'), '/'))
if_page_contains: //link[@rel="canonical" and not( contains(substring-after(substring-after(substring-after(@href, 'https://www.wsj.com/'), '/'), '/'), '/') )]

body: //article
body: //main

## neededed for non-amp-version and wallabagger with wsj-subscription
strip_id_or_class: article-body-tools
strip_id_or_class: print-header
strip_id_or_class: controls-container
strip_id_or_class: category
strip: //div[contains(@class, 'ribbon-container')]/parent::div
strip: //div[contains(@class, 'AuthoringContainer')]
strip: //div[@data-inset_type='newsletterinset']
strip: //div[@class='audioplayer' and @id='articlereader']
strip: //picture/@source
strip: //button
strip: //nav
strip: //main/div/article//section[last()]/following-sibling::*

author: //a[contains(@class, 'AuthorLink')]

body: //main[@id="main"]//div[@itemprop="articleLead" or @itemprop="articleBody" or contains(concat(' ',normalize-space(@class),' '),' articleBody ')]

title: //meta[@property="og:title"]/@content
body: //div[@id='wsj-article-wrap']
# is this still used?
body: //div[@id='article_story_body']

author: //meta[@name="author"]/@content
# for slide show content
body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
date: //meta[@itemprop="dateCreated"]/@content

strip_id_or_class: insetFullBracket
strip_id_or_class: newsletter-inset
strip_id_or_class: insettipBox
#strip_id_or_class: legacyInset
strip_id_or_class: recipeACShopAndBuyText
strip_id_or_class: article__byline
strip_id_or_class: type-InsetMediaVideo
strip_id_or_class: wsj-ad
strip_id_or_class: bylineWrap
strip: //amp-iframe
strip: //*[@amp-access-hide]

strip: //div[contains(@class, 'insetContent')]//cite
strip: //*[contains(@style, 'visibility: hidden;')]
strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))]
strip: //div[contains(@class, 'carousel')]
strip: //div[div[contains(@class, 'media-object-rich-text') and h4 and ul[@class="articleList"]]]
strip: //div[contains(@class, 'snippet')]
strip: //div[contains(@class, 'media-object-video')]

# see https://elaineou.com/2017/01/19/how-the-twitter-app-bypasses-paywalls/
#http_header(user-agent): Mozilla/5.0 (iPhone; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.1.32 (KHTML, like Gecko) Mobile/14C92 Twitter for iPhone
#http_header(referer): https://t.co/T1323aaaa

prune: no
tidy: no

test_url: http://www.wsj.com/articles/airasia-flight-8501-tail-recovered-1420878809
test_contains: Saturday evening that the black boxes
test_url: http://www.wsj.com/news/articles/SB10001424052702304626304579509100018004342
test_url: http://www.wsj.com/article/SB10001424052970203363504577185322849515102.html
# slide show
test_url: http://www.wsj.com/article/SB10001424052970204791104577110550376458164.html
test_url: https://www.wsj.com/articles/what-the-world-will-speak-in-2115-1420234648
test_url: https://www.wsj.com/articles/our-amazingly-plastic-brains-1423262095
test_url: https://www.wsj.com/articles/the-biggest-money-mistakes-we-makedecade-by-decade-1477275181
test_url: https://www.wsj.com/articles/russia-figure-skating-world-championships-11646143414
