Skip to content

Commit 6232c38

Browse files
authored
Fix .push_to_hub and cleanup get_full_repo_name usage (#25120)
* Fix .push_to_hub and cleanup get_full_repo_name usage * Do not rely on Python bool conversion magic * request changes
1 parent 400e76e commit 6232c38

31 files changed

+265
-240
lines changed

examples/flax/image-captioning/run_image_captioning_flax.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
HfArgumentParser,
5454
is_tensorboard_available,
5555
)
56-
from transformers.utils import get_full_repo_name, is_offline_mode, send_example_telemetry
56+
from transformers.utils import is_offline_mode, send_example_telemetry
5757

5858

5959
logger = logging.getLogger(__name__)
@@ -424,14 +424,14 @@ def main():
424424

425425
# Handle the repository creation
426426
if training_args.push_to_hub:
427-
if training_args.hub_model_id is None:
428-
repo_name = get_full_repo_name(
429-
Path(training_args.output_dir).absolute().name, token=training_args.hub_token
430-
)
431-
else:
432-
repo_name = training_args.hub_model_id
433-
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
434-
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
427+
# Retrieve of infer repo_name
428+
repo_name = training_args.hub_model_id
429+
if repo_name is None:
430+
repo_name = Path(training_args.output_dir).absolute().name
431+
# Create repo and retrieve repo_id
432+
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
433+
# Clone repo locally
434+
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
435435

436436
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
437437
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/

examples/flax/language-modeling/run_bart_dlm_flax.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
set_seed,
6060
)
6161
from transformers.models.bart.modeling_flax_bart import shift_tokens_right
62-
from transformers.utils import get_full_repo_name, send_example_telemetry
62+
from transformers.utils import send_example_telemetry
6363

6464

6565
MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys())
@@ -496,14 +496,14 @@ def main():
496496

497497
# Handle the repository creation
498498
if training_args.push_to_hub:
499-
if training_args.hub_model_id is None:
500-
repo_name = get_full_repo_name(
501-
Path(training_args.output_dir).absolute().name, token=training_args.hub_token
502-
)
503-
else:
504-
repo_name = training_args.hub_model_id
505-
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
506-
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
499+
# Retrieve of infer repo_name
500+
repo_name = training_args.hub_model_id
501+
if repo_name is None:
502+
repo_name = Path(training_args.output_dir).absolute().name
503+
# Create repo and retrieve repo_id
504+
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
505+
# Clone repo locally
506+
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
507507

508508
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
509509
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/

examples/flax/language-modeling/run_clm_flax.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
set_seed,
5959
)
6060
from transformers.testing_utils import CaptureLogger
61-
from transformers.utils import get_full_repo_name, send_example_telemetry
61+
from transformers.utils import send_example_telemetry
6262

6363

6464
logger = logging.getLogger(__name__)
@@ -372,14 +372,14 @@ def main():
372372

373373
# Handle the repository creation
374374
if training_args.push_to_hub:
375-
if training_args.hub_model_id is None:
376-
repo_name = get_full_repo_name(
377-
Path(training_args.output_dir).absolute().name, token=training_args.hub_token
378-
)
379-
else:
380-
repo_name = training_args.hub_model_id
381-
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
382-
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
375+
# Retrieve of infer repo_name
376+
repo_name = training_args.hub_model_id
377+
if repo_name is None:
378+
repo_name = Path(training_args.output_dir).absolute().name
379+
# Create repo and retrieve repo_id
380+
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
381+
# Clone repo locally
382+
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
383383

384384
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
385385
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/

examples/flax/language-modeling/run_mlm_flax.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
is_tensorboard_available,
6060
set_seed,
6161
)
62-
from transformers.utils import get_full_repo_name, send_example_telemetry
62+
from transformers.utils import send_example_telemetry
6363

6464

6565
MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys())
@@ -410,14 +410,14 @@ def main():
410410

411411
# Handle the repository creation
412412
if training_args.push_to_hub:
413-
if training_args.hub_model_id is None:
414-
repo_name = get_full_repo_name(
415-
Path(training_args.output_dir).absolute().name, token=training_args.hub_token
416-
)
417-
else:
418-
repo_name = training_args.hub_model_id
419-
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
420-
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
413+
# Retrieve of infer repo_name
414+
repo_name = training_args.hub_model_id
415+
if repo_name is None:
416+
repo_name = Path(training_args.output_dir).absolute().name
417+
# Create repo and retrieve repo_id
418+
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
419+
# Clone repo locally
420+
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
421421

422422
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
423423
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/

examples/flax/language-modeling/run_t5_mlm_flax.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
set_seed,
6060
)
6161
from transformers.models.t5.modeling_flax_t5 import shift_tokens_right
62-
from transformers.utils import get_full_repo_name, send_example_telemetry
62+
from transformers.utils import send_example_telemetry
6363

6464

6565
MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys())
@@ -537,14 +537,14 @@ def main():
537537

538538
# Handle the repository creation
539539
if training_args.push_to_hub:
540-
if training_args.hub_model_id is None:
541-
repo_name = get_full_repo_name(
542-
Path(training_args.output_dir).absolute().name, token=training_args.hub_token
543-
)
544-
else:
545-
repo_name = training_args.hub_model_id
546-
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
547-
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
540+
# Retrieve of infer repo_name
541+
repo_name = training_args.hub_model_id
542+
if repo_name is None:
543+
repo_name = Path(training_args.output_dir).absolute().name
544+
# Create repo and retrieve repo_id
545+
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
546+
# Clone repo locally
547+
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
548548

549549
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
550550
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/

examples/flax/question-answering/run_qa.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
PreTrainedTokenizerFast,
5656
is_tensorboard_available,
5757
)
58-
from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry
58+
from transformers.utils import check_min_version, send_example_telemetry
5959

6060

6161
logger = logging.getLogger(__name__)
@@ -462,14 +462,14 @@ def main():
462462

463463
# Handle the repository creation
464464
if training_args.push_to_hub:
465-
if training_args.hub_model_id is None:
466-
repo_name = get_full_repo_name(
467-
Path(training_args.output_dir).absolute().name, token=training_args.hub_token
468-
)
469-
else:
470-
repo_name = training_args.hub_model_id
471-
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
472-
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
465+
# Retrieve of infer repo_name
466+
repo_name = training_args.hub_model_id
467+
if repo_name is None:
468+
repo_name = Path(training_args.output_dir).absolute().name
469+
# Create repo and retrieve repo_id
470+
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
471+
# Clone repo locally
472+
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
473473

474474
# region Load Data
475475
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)

examples/flax/summarization/run_summarization_flax.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
HfArgumentParser,
5757
is_tensorboard_available,
5858
)
59-
from transformers.utils import get_full_repo_name, is_offline_mode, send_example_telemetry
59+
from transformers.utils import is_offline_mode, send_example_telemetry
6060

6161

6262
logger = logging.getLogger(__name__)
@@ -452,14 +452,14 @@ def main():
452452

453453
# Handle the repository creation
454454
if training_args.push_to_hub:
455-
if training_args.hub_model_id is None:
456-
repo_name = get_full_repo_name(
457-
Path(training_args.output_dir).absolute().name, token=training_args.hub_token
458-
)
459-
else:
460-
repo_name = training_args.hub_model_id
461-
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
462-
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
455+
# Retrieve of infer repo_name
456+
repo_name = training_args.hub_model_id
457+
if repo_name is None:
458+
repo_name = Path(training_args.output_dir).absolute().name
459+
# Create repo and retrieve repo_id
460+
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
461+
# Clone repo locally
462+
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
463463

464464
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
465465
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/

examples/flax/text-classification/run_flax_glue.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
TrainingArguments,
5050
is_tensorboard_available,
5151
)
52-
from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry
52+
from transformers.utils import check_min_version, send_example_telemetry
5353

5454

5555
logger = logging.getLogger(__name__)
@@ -342,14 +342,14 @@ def main():
342342

343343
# Handle the repository creation
344344
if training_args.push_to_hub:
345-
if training_args.hub_model_id is None:
346-
repo_name = get_full_repo_name(
347-
Path(training_args.output_dir).absolute().name, token=training_args.hub_token
348-
)
349-
else:
350-
repo_name = training_args.hub_model_id
351-
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
352-
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
345+
# Retrieve of infer repo_name
346+
repo_name = training_args.hub_model_id
347+
if repo_name is None:
348+
repo_name = Path(training_args.output_dir).absolute().name
349+
# Create repo and retrieve repo_id
350+
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
351+
# Clone repo locally
352+
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
353353

354354
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
355355
# or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).

examples/flax/token-classification/run_flax_ner.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
HfArgumentParser,
5050
is_tensorboard_available,
5151
)
52-
from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry
52+
from transformers.utils import check_min_version, send_example_telemetry
5353
from transformers.utils.versions import require_version
5454

5555

@@ -398,14 +398,14 @@ def main():
398398

399399
# Handle the repository creation
400400
if training_args.push_to_hub:
401-
if training_args.hub_model_id is None:
402-
repo_name = get_full_repo_name(
403-
Path(training_args.output_dir).absolute().name, token=training_args.hub_token
404-
)
405-
else:
406-
repo_name = training_args.hub_model_id
407-
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
408-
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
401+
# Retrieve of infer repo_name
402+
repo_name = training_args.hub_model_id
403+
if repo_name is None:
404+
repo_name = Path(training_args.output_dir).absolute().name
405+
# Create repo and retrieve repo_id
406+
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
407+
# Clone repo locally
408+
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
409409

410410
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
411411
# or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/

examples/flax/vision/run_image_classification.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
is_tensorboard_available,
5555
set_seed,
5656
)
57-
from transformers.utils import get_full_repo_name, send_example_telemetry
57+
from transformers.utils import send_example_telemetry
5858

5959

6060
logger = logging.getLogger(__name__)
@@ -293,14 +293,14 @@ def main():
293293

294294
# Handle the repository creation
295295
if training_args.push_to_hub:
296-
if training_args.hub_model_id is None:
297-
repo_name = get_full_repo_name(
298-
Path(training_args.output_dir).absolute().name, token=training_args.hub_token
299-
)
300-
else:
301-
repo_name = training_args.hub_model_id
302-
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
303-
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
296+
# Retrieve of infer repo_name
297+
repo_name = training_args.hub_model_id
298+
if repo_name is None:
299+
repo_name = Path(training_args.output_dir).absolute().name
300+
# Create repo and retrieve repo_id
301+
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
302+
# Clone repo locally
303+
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
304304

305305
# Initialize datasets and pre-processing transforms
306306
# We use torchvision here for faster pre-processing

examples/pytorch/image-classification/run_image_classification_no_trainer.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242

4343
import transformers
4444
from transformers import AutoConfig, AutoImageProcessor, AutoModelForImageClassification, SchedulerType, get_scheduler
45-
from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry
45+
from transformers.utils import check_min_version, send_example_telemetry
4646
from transformers.utils.versions import require_version
4747

4848

@@ -236,12 +236,14 @@ def main():
236236
# Handle the repository creation
237237
if accelerator.is_main_process:
238238
if args.push_to_hub:
239-
if args.hub_model_id is None:
240-
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
241-
else:
242-
repo_name = args.hub_model_id
243-
create_repo(repo_name, exist_ok=True, token=args.hub_token)
244-
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
239+
# Retrieve of infer repo_name
240+
repo_name = args.hub_model_id
241+
if repo_name is None:
242+
repo_name = Path(args.output_dir).absolute().name
243+
# Create repo and retrieve repo_id
244+
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
245+
# Clone repo locally
246+
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
245247

246248
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
247249
if "step_*" not in gitignore:

examples/pytorch/image-pretraining/run_mim_no_trainer.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from accelerate import Accelerator, DistributedType
2626
from accelerate.utils import set_seed
2727
from datasets import load_dataset
28-
from huggingface_hub import Repository
28+
from huggingface_hub import Repository, create_repo
2929
from torch.utils.data import DataLoader
3030
from torchvision.transforms import Compose, Lambda, Normalize, RandomHorizontalFlip, RandomResizedCrop, ToTensor
3131
from tqdm.auto import tqdm
@@ -41,7 +41,7 @@
4141
SchedulerType,
4242
get_scheduler,
4343
)
44-
from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry
44+
from transformers.utils import check_min_version, send_example_telemetry
4545
from transformers.utils.versions import require_version
4646

4747

@@ -406,11 +406,14 @@ def main():
406406
# Handle the repository creation
407407
if accelerator.is_main_process:
408408
if args.push_to_hub:
409-
if args.hub_model_id is None:
410-
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
411-
else:
412-
repo_name = args.hub_model_id
413-
repo = Repository(args.output_dir, clone_from=repo_name)
409+
# Retrieve of infer repo_name
410+
repo_name = args.hub_model_id
411+
if repo_name is None:
412+
repo_name = Path(args.output_dir).absolute().name
413+
# Create repo and retrieve repo_id
414+
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
415+
# Clone repo locally
416+
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
414417

415418
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
416419
if "step_*" not in gitignore:

0 commit comments

Comments
 (0)