huggingface
diff --git a/‎examples/seq2seq/README.md
+1-1 b/‎examples/seq2seq/README.md
+1-1
diff --git a/‎examples/seq2seq/test_bash_script.py
+109 b/‎examples/seq2seq/test_bash_script.py
+109
diff --git a/‎examples/seq2seq/test_data/wmt_en_ro/test.source
+20 b/‎examples/seq2seq/test_data/wmt_en_ro/test.source
+20
@@ -32,7 +32,7 @@ This dataset comes in two formats. The "packed" version merges short training ex
 
 ```bash
 cd examples/seq2seq
-https://s3.amazonaws.com/datasets.huggingface.co/translation/wmt_en_ro_packed_train_200.tgz
+wget https://s3.amazonaws.com/datasets.huggingface.co/translation/wmt_en_ro_packed_train_200.tgz
 tar -xzvf wmt_en_ro_packed_200.tgz
 export ENRO_DIR=wmt_en_ro_packed_train_200
 ```
 
@@ -0,0 +1,109 @@
+import argparse
+import os
+import sys
+import tempfile
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+import pytorch_lightning as pl
+import timeout_decorator
+import torch
+
+from transformers import BartForConditionalGeneration
+from transformers.testing_utils import slow
+
+from .finetune import SummarizationModule, main
+from .test_seq2seq_examples import CUDA_AVAILABLE, MBART_TINY
+from .utils import load_json
+
+
+MODEL_NAME = MBART_TINY
+# TODO(SS): MODEL_NAME = "sshleifer/student_mbart_en_ro_1_1"
+
+
+@slow
+@pytest.mark.skipif(not CUDA_AVAILABLE, reason="too slow to run on CPU")
+def test_model_download():
+    """This warms up the cache so that we can time the next test without including download time, which varies between machines."""
+    BartForConditionalGeneration.from_pretrained(MODEL_NAME)
+
+
+@timeout_decorator.timeout(120)
+@slow
+@pytest.mark.skipif(not CUDA_AVAILABLE, reason="too slow to run on CPU")
+def test_train_mbart_cc25_enro_script():
+    data_dir = "examples/seq2seq/test_data/wmt_en_ro"
+    env_vars_to_replace = {
+        "$MAX_LEN": 200,
+        "$BS": 4,
+        "$GAS": 1,
+        "$ENRO_DIR": data_dir,
+        "facebook/mbart-large-cc25": MODEL_NAME,
+        # 1 encoder and 1 decoder layer from finetuned mbart en-ro. Should be able to start >0 and improve quickly.
+        # Download is 600MB in previous test.
+        "val_check_interval=0.25": "val_check_interval=1.0",
+    }
+
+    # Clean up bash script
+    bash_script = Path("examples/seq2seq/train_mbart_cc25_enro.sh").open().read().split("finetune.py")[1].strip()
+    bash_script = bash_script.replace("\\\n", "").strip().replace("$@", "")
+    for k, v in env_vars_to_replace.items():
+        bash_script = bash_script.replace(k, str(v))
+    output_dir = tempfile.mkdtemp(prefix="output")
+
+    if CUDA_AVAILABLE:
+        gpus = 1  # torch.cuda.device_count()
+    else:
+        bash_script = bash_script.replace("--fp16", "")
+        gpus = 0
+
+    testargs = (
+        ["finetune.py"]
+        + bash_script.split()
+        + [
+            f"--output_dir={output_dir}",
+            f"--gpus={gpus}",
+            "--learning_rate=3e-1",
+            "--warmup_steps=0",
+            "--val_check_interval=1.0",
+            "--tokenizer_name=facebook/mbart-large-en-ro",
+        ]
+    )
+    with patch.object(sys, "argv", testargs):
+        parser = argparse.ArgumentParser()
+        parser = pl.Trainer.add_argparse_args(parser)
+        parser = SummarizationModule.add_model_specific_args(parser, os.getcwd())
+        args = parser.parse_args()
+        args.do_predict = False
+        # assert args.gpus == gpus THIS BREAKS for multigpu
+        model = main(args)
+
+    # Check metrics
+    metrics = load_json(model.metrics_save_path)
+    first_step_stats = metrics["val"][0]
+    last_step_stats = metrics["val"][-1]
+    assert len(metrics["val"]) == (args.max_epochs / args.val_check_interval)  # +1 accounts for val_sanity_check
+
+    assert last_step_stats["val_avg_gen_time"] >= 0.01
+
+    assert first_step_stats["val_avg_bleu"] < last_step_stats["val_avg_bleu"]  # model learned nothing
+    assert 1.0 >= last_step_stats["val_avg_gen_time"]  # model hanging on generate. Maybe bad config was saved.
+    assert isinstance(last_step_stats[f"val_avg_{model.val_metric}"], float)
+
+    # check lightning ckpt can be loaded and has a reasonable statedict
+    contents = os.listdir(output_dir)
+    ckpt_path = [x for x in contents if x.endswith(".ckpt")][0]
+    full_path = os.path.join(args.output_dir, ckpt_path)
+    ckpt = torch.load(full_path, map_location="cpu")
+    expected_key = "model.model.decoder.layers.0.encoder_attn_layer_norm.weight"
+    assert expected_key in ckpt["state_dict"]
+    assert ckpt["state_dict"]["model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32
+
+    # TODO(SS): turn on args.do_predict when PL bug fixed.
+    if args.do_predict:
+        contents = {os.path.basename(p) for p in contents}
+        assert "test_generations.txt" in contents
+        assert "test_results.txt" in contents
+        # assert len(metrics["val"]) ==  desired_n_evals
+        assert len(metrics["test"]) == 1
@@ -0,0 +1,20 @@
+UN Chief Says There Is No Military Solution in Syria Secretary-General Ban Ki-moon says his response to Russia's stepped up military support for Syria is that "there is no military solution" to the nearly five-year conflict and more weapons will only worsen the violence and misery for millions of people. The U.N. chief again urged all parties, including the divided U.N. Security Council, to unite and support inclusive negotiations to find a political solution. Ban told a news conference Wednesday that he plans to meet with foreign ministers of the five permanent council nations - the U.S., Russia, China, Britain and France - on the sidelines of the General Assembly's ministerial session later this month to discuss Syria.
+He expressed regret that divisions in the council and among the Syrian people and regional powers "made this situation unsolvable." Ban urged the five permanent members to show the solidarity and unity they did in achieving an Iran nuclear deal in addressing the Syria crisis. 8 Poll Numbers That Show Donald Trump Is For Real Some have tried to label him a flip-flopper. Others have dismissed him as a joke. And some are holding out for an implosion. But no matter how some Republicans are trying to drag Donald Trump down from atop the polls, it hasn't worked (yet).
+Ten of the last 11 national polls have shown Donald Trump's lead at double digits, and some are starting to ask seriously what it means for the real estate mogul's nomination chances. Of course, it's still early in the election cycle. None of this is to say that Trump is likely to win the Republican nomination. Pundits point out that at this time in 2011, Rick Perry's lead was giving way to a rising Herman Cain, neither of whom won even one state in the nomination process. And there are many reasons he would struggle in a general election. But outside groups like Jeb Bush's Super PAC and the economic conservative group Club for Growth are recognizing Trump's staying power and beginning to unload their dollars to topple him.
+Here are some recent poll numbers that suggest that the real estate mogul isn't just a passing phase: Trump's favorability ratings have turned 180 degrees. Right before Donald Trump announced his candidacy in mid-June, a Monmouth University poll showed only two in 10 Republicans had a positive view of the real estate mogul. By mid-July, it was 40 percent. In early August, it was 52 percent. Now, six in 10 Republicans have a favorable view of Donald Trump. Roughly three in 10 say they have a negative view. And these numbers hold up in early states. A Quinnipiac poll in Iowa last week found that 60 percent of Republicans there had a favorable view of Trump.
+Two-thirds of GOP voters would be happy with Trump as the nominee. In a CNN/ORC poll last week, 67 percent of Republicans said they would be either "enthusiastic" or "satisfied" if Trump were the nominee. Only two in 10 say they would be "upset" if he were the nominee. Only Ben Carson generates roughly the same level of enthusiasm as Trump (43 percent say they would be "enthusiastic" vs. 40 percent who say the same of Trump). The next closest in enthusiasm? Marco Rubio with only 21 percent.
+On the flip side, 47 percent of Republican voters say they would be "dissatisfied" or "upset" if establishment favorite Jeb Bush becomes the nominee. A majority of Republicans don't see Trump's temperament as a problem. While Donald Trump has been widely criticized for his bombast and insults, 52 percent of leaned Republican voters nationwide think that the real estate mogul has the right temperament to be president, according to Monday's ABC News/Washington Post poll. The same number holds in the first-in-the-nation caucus state of Iowa, where the same 52 percent of Republicans think he has the personality to be commander in chief, according to Quinnipiac last week.
+Still, 44 percent think he doesn't have the personality to serve effectively, and almost six in 10 independents say his temperament does not belong in the White House, according to ABC/Post. Republican voters are getting used to the idea. When they put on their pundit hats, Republican voters think Trump is for real. When asked who is most likely to win the GOP nomination, four in 10 said Trump was the best bet, according to a CNN/ORC poll out last week. That's a change from when four in 10 placed their money on Jeb Bush in late July. Full disclosure: GOP voters haven't had the clearest crystal ball in the past.
+At this time last cycle, four in 10 Republicans picked Rick Perry to win the nomination, vs. only 28 percent for eventual nominee Mitt Romney. Still, it shows that a plurality of GOP voters see Trump's campaign as plausible. Even if Republicans rallied around another candidate, Trump still beats almost everyone. Some pundits point out that the splintered field is likely contributing to Trump's lead, while anti-Trump support is be spread diffusely among more than a dozen other candidates. But a Monmouth University poll in early September shows that, in a hypothetical head-to-head matchup between Trump and most other Republican candidates, Trump almost always garners majority support.
+He leads Carly Fiorina by 13 points, Marco Rubio by 14 points, Walker by 15 points, Jeb Bush by 19 points, and, finally, Rand Paul, John Kasich and Chris Christie by 33 points each. He's in a dead heat with Ted Cruz. The only candidate who beats him? Ben Carson would lead the businessman by a wide 19 points in a hypothetical head-to-head. A bare majority of Donald Trump's supporters say they've made up their minds. A new CBS/NYT poll out on Tuesday shows that just more than half of voters who support Trump say they have locked in their votes. Obviously, a lot can happen to change that, and no one can really say they would never change their mind.
+46 percent said they are leaving the door open to switching candidates. Still, Trump's strongest competition at the moment is from fellow outsider neurosurgeon Ben Carson, but voters who say they have made up their minds are twice as likely to go for Trump. Six in 10 Republicans say they agree with Trump on immigration. Even since Donald Trump called immigrants from Mexico "rapists" in his campaign announcement speech two months ago, immigration has been front and center in the 2016 conversation. Some are worried that Trump's bombast will drive crucial Hispanic voters away from the Republican Party and damage rebranding efforts.
+But according to Monday's new ABC/Post poll, six in 10 Republicans say they agree with Trump on immigration issues. So as long as immigration remains in the spotlight, it seems Donald Trump will remain too. Frustration with government is climbing to new highs. Donald Trump and Ben Carson now account for roughly half of the support from Republican voters, largely due to their outsider status. Six in 10 Republicans in Monday's new ABC/Post poll say they want a political outsider over someone with government experience. And they are angry at Washington, too.
+A Des Moines Register/Bloomberg poll in Iowa from two weeks ago shows that three in four Iowa Republicans are frustrated with Republicans in Congress, with 54 percent "unsatisfied" and 21 percent "mad as hell." Jeremy Corbyn to make debut at Prime Minister's Questions Since his election, Mr Corbyn's debut at PMQs has been keenly awaited New Labour leader Jeremy Corbyn is to make his debut at Prime Minister's Questions later, taking on David Cameron for the first time.
+Mr Corbyn will rise to ask the first of his six allotted questions shortly after midday, with his performance likely to be closely scrutinised by the media and Labour MPs. He has called for "less theatre and more facts" at the weekly showpiece. He has also said he could skip some sessions, leaving them to colleagues. The encounter will be the first parliamentary test of Mr Corbyn's leadership, coming after his appointment of a shadow cabinet and his speech to the TUC annual congress on Tuesday.
+Meanwhile, the Labour leader's decision to stand in silence during the singing of the national anthem at a service on Tuesday to mark the 75th anniversary of the Battle of Britain has attracted criticism from a number of Tory MPs and is the focus of several front page stories in the newspapers. Mr Corbyn's decision not to sing the national anthem has attracted attention A spokesman for Mr Corbyn said he had "stood in respectful silence" and did recognise the "heroism of the Royal Air Force in the Battle of Britain."
+But a member of Mr Corbyn's shadow cabinet, Owen Smith, told BBC Two's Newsnight programme he would have advised the Labour leader to sing the national anthem "irrespective" of his belief that the monarchy should be abolished. Nearly a dozen shadow ministers have refused to serve in Mr Corbyn's top team, citing differences over the economy, defence and foreign affairs, while less than a sixth of the parliamentary party originally backed him as leader. BBC political correspondent Robin Brant says policy differences are also "stacking up" within Labour following Mr Corbyn's appointment over its position on the European Union and the government's cap on benefits.
+Mr Corbyn told the TUC conference Labour was putting forward amendments to remove the whole idea of a cap altogether. Hours later Mr Smith, the shadow work and pensions secretary, said the party was "very clear" that it was only opposing government plans to reduce the level of cap from £26,000 to £23,000. Mr Corbyn will be the fifth Labour leader that David Cameron has faced across the despatch box over the past decade since he became Tory leader. The Labour leader, who has promised a different approach to politics, says he has "crowd sourced" ideas for questions to ask Mr Cameron and has been given more than 30,000 suggestions.
+The Islington North MP has said PMQs is too confrontational and that he will refrain from both "repartee" and trading barbs, instead vowing to focus on serious issues such as poverty, inequality and the challenges facing young people. Mr Corbyn has said that Angela Eagle, the shadow business secretary, will deputise for him at PMQs when he does not attend - for instance when Mr Cameron is travelling abroad. He has also floated the idea of allowing other colleagues to take the floor on occasion, saying he had approached the Commons Speaker John Bercow to discuss the issue.
+When he became leader in 2005, Mr Cameron said he wanted to move away from the "Punch and Judy" style of politics often associated with PMQs but admitted some years later that he had failed. Since it was first televised in 1990, PMQs has been seen as a key barometer of a leader's judgement, their command of the Commons and their standing among their fellow MPs although critics have argued it has become a caricature and is in need of far-reaching reforms. 'Shot in Joburg': Homeless youth trained as photographers Downtown Johannesburg is a tough place to be homeless.
+But one group of former street children have found a way to learn a skill and make a living. "I was shot in Joburg" is a non-profit studio that teaches homeless youngsters how to take photographs of their neighbourhood and make a profit from it. BBC News went to meet one of the project's first graduates. JD Sports boss says higher wages could hurt expansion JD Sports Executive Chairman Peter Cowgill says a higher minimum wage for UK workers could mean "more spending power in the pockets of potential consumers." But that spending power is unlikely to outweigh the higher labour costs at his firm, he says.
+The costs could hit JD Sports' expansion plans, he added, which could mean fewer extra jobs. Thanasi Kokkinakis backed by Tennis Australia president Steve Healy Thanasi Kokkinakis deserves kudos rather than criticism for his behaviour. Thanasi Kokkinakis has been the collateral damage in the recent storm around his friend Nick Kyrgios and deserves kudos rather than criticism for his own behaviour, according to Tennis Australia president Steve Healy.