-
Notifications
You must be signed in to change notification settings - Fork 10.4k
/
Copy pathCompilation.cpp
1280 lines (1146 loc) · 48.8 KB
/
Compilation.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===--- Compilation.cpp - Compilation Task Data Structure ----------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "swift/Driver/Compilation.h"
#include "swift/AST/DiagnosticEngine.h"
#include "swift/AST/DiagnosticsDriver.h"
#include "swift/AST/FineGrainedDependencies.h"
#include "swift/AST/FineGrainedDependencyFormat.h"
#include "swift/Basic/Assertions.h"
#include "swift/Basic/OutputFileMap.h"
#include "swift/Basic/ParseableOutput.h"
#include "swift/Basic/Program.h"
#include "swift/Basic/STLExtras.h"
#include "swift/Basic/Statistic.h"
#include "swift/Basic/TaskQueue.h"
#include "swift/Basic/Version.h"
#include "swift/Basic/type_traits.h"
#include "swift/Driver/Action.h"
#include "swift/Driver/Driver.h"
#include "swift/Driver/Job.h"
#include "swift/Driver/ToolChain.h"
#include "swift/Option/Options.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/YAMLParser.h"
#include "llvm/Support/raw_ostream.h"
#include <fstream>
#include <signal.h>
#if defined(_WIN32)
#include <fcntl.h>
#include <io.h>
#endif
#define DEBUG_TYPE "batch-mode"
// Batch-mode has a sub-mode for testing that randomizes batch partitions,
// by user-provided seed. That is the only thing randomized here.
#include <random>
using namespace swift;
using namespace swift::sys;
using namespace swift::driver;
using namespace swift::parseable_output;
using namespace llvm::opt;
struct LogJob {
const Job *j;
LogJob(const Job *j) : j(j) {}
};
struct LogJobArray {
const ArrayRef<const Job *> js;
LogJobArray(const ArrayRef<const Job *> js) : js(js) {}
};
struct LogJobSet {
const SmallPtrSetImpl<const Job*> &js;
LogJobSet(const SmallPtrSetImpl<const Job*> &js) : js(js) {}
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const LogJob &lj) {
lj.j->printSummary(os);
return os;
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const LogJobArray &ljs) {
os << "[";
interleave(ljs.js,
[&](Job const *j) { os << LogJob(j); },
[&]() { os << ' '; });
os << "]";
return os;
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const LogJobSet &ljs) {
os << "{";
interleave(ljs.js,
[&](Job const *j) { os << LogJob(j); },
[&]() { os << ' '; });
os << "}";
return os;
}
// clang-format off
Compilation::Compilation(DiagnosticEngine &Diags,
const ToolChain &TC,
OutputInfo const &OI,
OutputLevel Level,
std::unique_ptr<InputArgList> InputArgs,
std::unique_ptr<DerivedArgList> TranslatedArgs,
InputFileList InputsWithTypes,
size_t FilelistThreshold,
bool EnableBatchMode,
unsigned BatchSeed,
std::optional<unsigned> BatchCount,
std::optional<unsigned> BatchSizeLimit,
bool SaveTemps,
bool ShowDriverTimeCompilation,
std::unique_ptr<UnifiedStatsReporter> StatsReporter,
bool OnlyOneDependencyFile)
: Diags(Diags), TheToolChain(TC),
TheOutputInfo(OI),
Level(Level),
RawInputArgs(std::move(InputArgs)),
TranslatedArgs(std::move(TranslatedArgs)),
InputFilesWithTypes(std::move(InputsWithTypes)),
EnableBatchMode(EnableBatchMode),
BatchSeed(BatchSeed),
BatchCount(BatchCount),
BatchSizeLimit(BatchSizeLimit),
SaveTemps(SaveTemps),
ShowDriverTimeCompilation(ShowDriverTimeCompilation),
Stats(std::move(StatsReporter)),
FilelistThreshold(FilelistThreshold),
OnlyOneDependencyFile(OnlyOneDependencyFile) { }
// clang-format on
static bool writeFilelistIfNecessary(const Job *job, const ArgList &args,
DiagnosticEngine &diags);
using CommandSetVector = llvm::SetVector<const Job*>;
using BatchPartition = std::vector<std::vector<const Job*>>;
namespace {
static DetailedTaskDescription
constructDetailedTaskDescription(const driver::Job &Cmd) {
std::string Executable = Cmd.getExecutable();
SmallVector<std::string, 16> Arguments;
std::string CommandLine;
SmallVector<CommandInput, 4> Inputs;
SmallVector<OutputPair, 8> Outputs;
for (const auto &A : Cmd.getArguments()) {
Arguments.push_back(A);
}
llvm::raw_string_ostream wrapper(CommandLine);
Cmd.printCommandLine(wrapper, "");
wrapper.flush();
for (const Action *A : Cmd.getSource().getInputs()) {
if (const auto *IA = dyn_cast<InputAction>(A))
Inputs.push_back(CommandInput(IA->getInputArg().getValue()));
}
for (const driver::Job *J : Cmd.getInputs()) {
auto OutFiles = J->getOutput().getPrimaryOutputFilenames();
if (const auto *BJAction = dyn_cast<BackendJobAction>(&Cmd.getSource())) {
Inputs.push_back(CommandInput(OutFiles[BJAction->getInputIndex()]));
} else {
for (llvm::StringRef FileName : OutFiles) {
Inputs.push_back(CommandInput(FileName));
}
}
}
// TODO: set up Outputs appropriately.
file_types::ID PrimaryOutputType = Cmd.getOutput().getPrimaryOutputType();
if (PrimaryOutputType != file_types::TY_Nothing) {
for (llvm::StringRef OutputFileName :
Cmd.getOutput().getPrimaryOutputFilenames()) {
Outputs.push_back(OutputPair(PrimaryOutputType, OutputFileName.str()));
}
}
file_types::forAllTypes([&](file_types::ID Ty) {
for (auto Output : Cmd.getOutput().getAdditionalOutputsForType(Ty)) {
Outputs.push_back(OutputPair(Ty, Output.str()));
}
});
return DetailedTaskDescription{Executable, Arguments, CommandLine, Inputs,
Outputs};
}
} // namespace
namespace swift {
namespace driver {
class PerformJobsState {
/// The containing Compilation object.
Compilation &Comp;
/// All jobs which have been scheduled for execution (whether or not
/// they've finished execution), or which have been determined that they
/// don't need to run.
CommandSet ScheduledCommands;
/// A temporary buffer to hold commands that were scheduled but haven't been
/// added to the Task Queue yet, because we might try batching them together
/// first.
CommandSetVector PendingExecution;
/// Set of synthetic BatchJobs that serve to cluster subsets of jobs waiting
/// in PendingExecution. Also used to identify (then unpack) BatchJobs back
/// to their underlying non-Batch Jobs, when running a callback from
/// TaskQueue.
CommandSet BatchJobs;
/// Persistent counter for allocating quasi-PIDs to Jobs combined into
/// BatchJobs. Quasi-PIDs are _negative_ PID-like unique keys used to
/// masquerade BatchJob constituents as (quasi)processes, when writing
/// parseable output to consumers that don't understand the idea of a batch
/// job. They are negative in order to avoid possibly colliding with real
/// PIDs (which are always positive). We start at -1000 here as a crude but
/// harmless hedge against colliding with an errno value that might slip
/// into the stream of real PIDs (say, due to a TaskQueue bug).
int64_t NextBatchQuasiPID = parseable_output::QUASI_PID_START;
/// All jobs which have finished execution or which have been determined
/// that they don't need to run.
CommandSet FinishedCommands;
/// A map from a Job to the commands it is known to be blocking.
///
/// The blocked jobs should be scheduled as soon as possible.
llvm::SmallDenseMap<const Job *, TinyPtrVector<const Job *>, 16>
BlockingCommands;
/// A map from commands that didn't get to run to whether or not they affect
/// downstream commands.
///
/// Only intended for source files.
llvm::SmallDenseMap<const Job *, bool, 16> UnfinishedCommands;
private:
/// TaskQueue for execution.
std::unique_ptr<TaskQueue> TQ;
/// Cumulative result of PerformJobs(), accumulated from subprocesses.
int ResultCode = EXIT_SUCCESS;
/// True if any Job crashed.
bool AnyAbnormalExit = false;
/// Timers for monitoring execution time of subprocesses.
llvm::TimerGroup DriverTimerGroup {"driver", "Driver Compilation Time"};
llvm::SmallDenseMap<const Job *, std::unique_ptr<llvm::Timer>, 16>
DriverTimers;
const Job *findUnfinishedJob(ArrayRef<const Job *> JL) {
for (const Job *Cmd : JL) {
if (!FinishedCommands.count(Cmd))
return Cmd;
}
return nullptr;
}
/// Schedule the given Job if it has not been scheduled and if all of
/// its inputs are in FinishedCommands.
void scheduleCommandIfNecessaryAndPossible(const Job *Cmd) {
if (ScheduledCommands.count(Cmd)) {
if (Comp.getShowJobLifecycle()) {
llvm::outs() << "Already scheduled: " << LogJob(Cmd) << "\n";
}
return;
}
if (auto Blocking = findUnfinishedJob(Cmd->getInputs())) {
BlockingCommands[Blocking].push_back(Cmd);
if (Comp.getShowJobLifecycle()) {
llvm::outs() << "Blocked by: " << LogJob(Blocking)
<< ", now blocking jobs: "
<< LogJobArray(BlockingCommands[Blocking]) << "\n";
}
return;
}
// Adding to scheduled means we've committed to its completion (not
// distinguished from skipping). We never remove it once inserted.
ScheduledCommands.insert(Cmd);
// Adding to pending means it should be in the next round of additions to
// the task queue (either batched or singularly); we remove Jobs from
// PendingExecution once we hand them over to the TaskQueue.
PendingExecution.insert(Cmd);
}
// Sort for ease of testing
template <typename Jobs>
void scheduleCommandsInSortedOrder(const Jobs &jobs) {
llvm::SmallVector<const Job *, 16> sortedJobs;
Comp.sortJobsToMatchCompilationInputs(jobs, sortedJobs);
for (const Job *Cmd : sortedJobs)
scheduleCommandIfNecessaryAndPossible(Cmd);
}
void addPendingJobToTaskQueue(const Job *Cmd) {
// FIXME: Failing here should not take down the whole process.
bool success =
writeFilelistIfNecessary(Cmd, Comp.getArgs(), Comp.getDiags());
assert(success && "failed to write filelist");
(void)success;
assert(Cmd->getExtraEnvironment().empty() &&
"not implemented for compilations with multiple jobs");
if (Comp.getShowJobLifecycle())
llvm::outs() << "Added to TaskQueue: " << LogJob(Cmd) << "\n";
TQ->addTask(Cmd->getExecutable(), Cmd->getArgumentsForTaskExecution(),
std::nullopt, (void *)Cmd);
}
/// When a task finishes, check other Jobs that may be blocked.
void markFinished(const Job *Cmd, bool Skipped=false) {
if (Comp.getShowJobLifecycle()) {
llvm::outs() << "Job "
<< (Skipped ? "skipped" : "finished")
<< ": " << LogJob(Cmd) << "\n";
}
FinishedCommands.insert(Cmd);
if (auto *Stats = Comp.getStatsReporter()) {
auto &D = Stats->getDriverCounters();
if (Skipped)
++D.NumDriverJobsSkipped;
else
++D.NumDriverJobsRun;
}
auto BlockedIter = BlockingCommands.find(Cmd);
if (BlockedIter != BlockingCommands.end()) {
auto AllBlocked = std::move(BlockedIter->second);
if (Comp.getShowJobLifecycle()) {
llvm::outs() << "Scheduling maybe-unblocked jobs: "
<< LogJobArray(AllBlocked) << "\n";
}
BlockingCommands.erase(BlockedIter);
scheduleCommandsInSortedOrder(AllBlocked);
}
}
bool isBatchJob(const Job *MaybeBatchJob) const {
return BatchJobs.count(MaybeBatchJob) != 0;
}
/// Callback which will be called immediately after a task has started. This
/// callback may be used to provide output indicating that the task began.
void taskBegan(ProcessId Pid, void *Context) {
// TODO: properly handle task began.
const Job *BeganCmd = (const Job *)Context;
if (Comp.getShowDriverTimeCompilation()) {
llvm::SmallString<128> TimerName;
llvm::raw_svector_ostream OS(TimerName);
OS << LogJob(BeganCmd);
DriverTimers.insert({
BeganCmd,
std::unique_ptr<llvm::Timer>(
new llvm::Timer("task", OS.str(), DriverTimerGroup))
});
DriverTimers[BeganCmd]->startTimer();
}
switch (Comp.getOutputLevel()) {
case OutputLevel::Normal:
break;
// For command line or verbose output, print out each command as it
// begins execution.
case OutputLevel::PrintJobs:
BeganCmd->printCommandLineAndEnvironment(llvm::outs());
break;
case OutputLevel::Verbose:
BeganCmd->printCommandLine(llvm::errs());
break;
case OutputLevel::Parseable:
BeganCmd->forEachContainedJobAndPID(Pid, [&](const Job *J, Job::PID P) {
auto TascDesc = constructDetailedTaskDescription(*J);
parseable_output::emitBeganMessage(llvm::errs(),
J->getSource().getClassName(),
TascDesc, P,
TaskProcessInformation(Pid));
});
break;
}
}
/// Check to see if a job produced a zero-length serialized diagnostics
/// file, which is used to indicate batch-constituents that were batched
/// together with a failing constituent but did not, themselves, produce any
/// errors.
bool jobWasBatchedWithFailingJobs(const Job *J) const {
auto DiaPath =
J->getOutput().getAnyOutputForType(file_types::TY_SerializedDiagnostics);
if (DiaPath.empty())
return false;
if (!llvm::sys::fs::is_regular_file(DiaPath))
return false;
uint64_t Size;
auto EC = llvm::sys::fs::file_size(DiaPath, Size);
if (EC)
return false;
return Size == 0;
}
/// If a batch-constituent job happens to be batched together with a job
/// that exits with an error, the batch-constituent may be considered
/// "cancelled".
bool jobIsCancelledBatchConstituent(int ReturnCode,
const Job *ContainerJob,
const Job *ConstituentJob) {
return ReturnCode != 0 &&
isBatchJob(ContainerJob) &&
jobWasBatchedWithFailingJobs(ConstituentJob);
}
/// Unpack a \c BatchJob that has finished into its constituent \c Job
/// members, and call \c taskFinished on each, propagating any \c
/// TaskFinishedResponse other than \c
/// TaskFinishedResponse::ContinueExecution from any of the constituent
/// calls.
TaskFinishedResponse
unpackAndFinishBatch(int ReturnCode, StringRef Output,
StringRef Errors, const BatchJob *B) {
if (Comp.getShowJobLifecycle())
llvm::outs() << "Batch job finished: " << LogJob(B) << "\n";
auto res = TaskFinishedResponse::ContinueExecution;
for (const Job *J : B->getCombinedJobs()) {
if (Comp.getShowJobLifecycle())
llvm::outs() << " ==> Unpacked batch constituent finished: "
<< LogJob(J) << "\n";
auto r = taskFinished(
llvm::sys::ProcessInfo::InvalidPid, ReturnCode, Output, Errors,
TaskProcessInformation(llvm::sys::ProcessInfo::InvalidPid),
(void *)J);
if (r != TaskFinishedResponse::ContinueExecution)
res = r;
}
return res;
}
void
emitParseableOutputForEachFinishedJob(ProcessId Pid, int ReturnCode,
StringRef Output,
const Job *FinishedCmd,
TaskProcessInformation ProcInfo) {
FinishedCmd->forEachContainedJobAndPID(Pid, [&](const Job *J,
Job::PID P) {
if (jobIsCancelledBatchConstituent(ReturnCode, FinishedCmd, J)) {
// Simulate SIGINT-interruption to parseable-output consumer for any
// constituent of a failing batch job that produced no errors of its
// own.
parseable_output::emitSignalledMessage(llvm::errs(),
J->getSource().getClassName(),
"cancelled batch constituent",
"", SIGINT, P, ProcInfo);
} else {
parseable_output::emitFinishedMessage(llvm::errs(),
J->getSource().getClassName(),
Output.str(), ReturnCode,
P, ProcInfo);
}
});
}
/// Callback which will be called immediately after a task has finished
/// execution. Determines if execution should continue, and also schedule
/// any additional Jobs which we now know we need to run.
TaskFinishedResponse taskFinished(ProcessId Pid, int ReturnCode,
StringRef Output, StringRef Errors,
TaskProcessInformation ProcInfo,
void *Context) {
const Job *const FinishedCmd = (const Job *)Context;
if (Pid != llvm::sys::ProcessInfo::InvalidPid) {
if (Comp.getShowDriverTimeCompilation()) {
DriverTimers[FinishedCmd]->stopTimer();
}
processOutputOfFinishedProcess(Pid, ReturnCode, FinishedCmd, Output,
ProcInfo);
}
if (Comp.getStatsReporter() && ProcInfo.getResourceUsage().has_value())
Comp.getStatsReporter()->recordJobMaxRSS(
ProcInfo.getResourceUsage()->Maxrss);
if (isBatchJob(FinishedCmd)) {
return unpackAndFinishBatch(ReturnCode, Output, Errors,
static_cast<const BatchJob *>(FinishedCmd));
}
if (ReturnCode != EXIT_SUCCESS)
return taskFailed(FinishedCmd, ReturnCode);
// When a task finishes, we need to reevaluate the other commands that
// might have been blocked.
markFinished(FinishedCmd);
return TaskFinishedResponse::ContinueExecution;
}
TaskFinishedResponse taskFailed(const Job *FinishedCmd,
const int ReturnCode) {
// The task failed, so return true without performing any further
// dependency analysis.
// Store this task's ReturnCode as our Result if we haven't stored
// anything yet.
if (ResultCode == EXIT_SUCCESS)
ResultCode = ReturnCode;
if (!isa<CompileJobAction>(FinishedCmd->getSource()) ||
ReturnCode != EXIT_FAILURE) {
Comp.getDiags().diagnose(SourceLoc(), diag::error_command_failed,
FinishedCmd->getSource().getClassName(),
ReturnCode);
}
// See how ContinueBuildingAfterErrors gets set up in Driver.cpp for
// more info.
assert((Comp.getContinueBuildingAfterErrors() ||
!Comp.getBatchModeEnabled()) &&
"batch mode diagnostics require ContinueBuildingAfterErrors");
return Comp.getContinueBuildingAfterErrors()
? TaskFinishedResponse::ContinueExecution
: TaskFinishedResponse::StopExecution;
}
#if defined(_WIN32)
struct FileBinaryModeRAII {
FileBinaryModeRAII(FILE *F) : F(F) {
PrevMode = _setmode(_fileno(F), _O_BINARY);
}
~FileBinaryModeRAII() {
_setmode(_fileno(F), PrevMode);
}
FILE *F;
int PrevMode;
};
#else
struct FileBinaryModeRAII {
FileBinaryModeRAII(FILE *) {}
};
#endif
void processOutputOfFinishedProcess(ProcessId Pid, int ReturnCode,
const Job *const FinishedCmd,
StringRef Output,
TaskProcessInformation ProcInfo) {
switch (Comp.getOutputLevel()) {
case OutputLevel::PrintJobs:
// Only print the jobs, not the outputs
break;
case OutputLevel::Normal:
case OutputLevel::Verbose:
// Send the buffered output to stderr, though only if we
// support getting buffered output.
if (TaskQueue::supportsBufferingOutput()) {
// Temporarily change stderr to binary mode to avoid double
// LF -> CR LF conversions on the outputs from child
// processes, which have already this conversion appplied.
// This makes a difference only for Windows.
FileBinaryModeRAII F(stderr);
llvm::errs() << Output;
}
break;
case OutputLevel::Parseable:
emitParseableOutputForEachFinishedJob(Pid, ReturnCode, Output,
FinishedCmd, ProcInfo);
break;
}
}
TaskFinishedResponse taskSignalled(ProcessId Pid, StringRef ErrorMsg,
StringRef Output, StringRef Errors,
void *Context, std::optional<int> Signal,
TaskProcessInformation ProcInfo) {
const Job *SignalledCmd = (const Job *)Context;
if (Comp.getShowDriverTimeCompilation()) {
DriverTimers[SignalledCmd]->stopTimer();
}
if (Comp.getOutputLevel() == OutputLevel::Parseable) {
// Parseable output was requested.
SignalledCmd->forEachContainedJobAndPID(Pid, [&](const Job *J,
Job::PID P) {
parseable_output::emitSignalledMessage(llvm::errs(),
J->getSource().getClassName(),
ErrorMsg, Output, Signal, P,
ProcInfo);
});
} else {
// Otherwise, send the buffered output to stderr, though only if we
// support getting buffered output.
if (TaskQueue::supportsBufferingOutput())
llvm::errs() << Output;
}
if (Comp.getStatsReporter() && ProcInfo.getResourceUsage().has_value())
Comp.getStatsReporter()->recordJobMaxRSS(
ProcInfo.getResourceUsage()->Maxrss);
if (!ErrorMsg.empty())
Comp.getDiags().diagnose(SourceLoc(),
diag::error_unable_to_execute_command,
ErrorMsg);
if (Signal.has_value()) {
Comp.getDiags().diagnose(SourceLoc(), diag::error_command_signalled,
SignalledCmd->getSource().getClassName(),
Signal.value());
} else {
Comp.getDiags()
.diagnose(SourceLoc(),
diag::error_command_signalled_without_signal_number,
SignalledCmd->getSource().getClassName());
}
// Since the task signalled, unconditionally set result to -2.
ResultCode = -2;
AnyAbnormalExit = true;
return TaskFinishedResponse::StopExecution;
}
public:
PerformJobsState(Compilation &Comp, std::unique_ptr<TaskQueue> &&TaskQueue)
: Comp(Comp),
TQ(std::move(TaskQueue)) {}
/// Schedule and run initial, additional, and batch jobs.
void runJobs() {
scheduleJobsBeforeBatching();
formBatchJobsAndAddPendingJobsToTaskQueue();
runTaskQueueToCompletion();
}
private:
void scheduleJobsBeforeBatching() {
scheduleJobsForNonIncrementalCompilation();
}
void scheduleJobsForNonIncrementalCompilation() {
for (const Job *Cmd : Comp.getJobs())
scheduleCommandIfNecessaryAndPossible(Cmd);
}
/// Insert all jobs in \p Cmds (of descriptive name \p Kind) to the \c
/// TaskQueue, and clear \p Cmds.
template <typename Container>
void transferJobsToTaskQueue(Container &Cmds, StringRef Kind) {
for (const Job *Cmd : Cmds) {
if (Comp.getShowJobLifecycle())
llvm::outs() << "Adding " << Kind
<< " job to task queue: "
<< LogJob(Cmd) << "\n";
addPendingJobToTaskQueue(Cmd);
}
Cmds.clear();
}
/// Partition the jobs in \c PendingExecution into those that are \p
/// Batchable and those that are \p NonBatchable, clearing \p
/// PendingExecution.
void getPendingBatchableJobs(CommandSetVector &Batchable,
CommandSetVector &NonBatchable) {
for (const Job *Cmd : PendingExecution) {
if (Comp.getToolChain().jobIsBatchable(Comp, Cmd)) {
if (Comp.getShowJobLifecycle())
llvm::outs() << "Batchable: " << LogJob(Cmd) << "\n";
Batchable.insert(Cmd);
} else {
if (Comp.getShowJobLifecycle())
llvm::outs() << "Not batchable: " << LogJob(Cmd) << "\n";
NonBatchable.insert(Cmd);
}
}
}
/// If \p Batch is nonempty, construct a new \c BatchJob from its
/// contents by calling \p ToolChain::constructBatchJob, then insert the
/// new \c BatchJob into \p Batches.
void
formBatchJobFromPartitionBatch(std::vector<const Job *> &Batches,
std::vector<const Job *> const &Batch) {
if (Batch.empty())
return;
if (Comp.getShowJobLifecycle())
llvm::outs() << "Forming batch job from "
<< Batch.size() << " constituents\n";
auto const &TC = Comp.getToolChain();
auto J = TC.constructBatchJob(Batch, NextBatchQuasiPID, Comp);
if (J)
Batches.push_back(Comp.addJob(std::move(J)));
}
/// Build a vector of partition indices, one per Job: the i'th index says
/// which batch of the partition the i'th Job will be assigned to. If we are
/// shuffling due to -driver-batch-seed, the returned indices will not be
/// arranged in contiguous runs. We shuffle partition-indices here, not
/// elements themselves, to preserve the invariant that each batch is a
/// subsequence of the full set of inputs, not just a subset.
std::vector<size_t>
assignJobsToPartitions(size_t PartitionSize,
size_t NumJobs) {
size_t Remainder = NumJobs % PartitionSize;
size_t TargetSize = NumJobs / PartitionSize;
std::vector<size_t> PartitionIndex;
PartitionIndex.reserve(NumJobs);
for (size_t P = 0; P < PartitionSize; ++P) {
// Spread remainder evenly across partitions by adding 1 to the target
// size of the first Remainder of them.
size_t FillCount = TargetSize + ((P < Remainder) ? 1 : 0);
std::fill_n(std::back_inserter(PartitionIndex), FillCount, P);
}
if (Comp.getBatchSeed() != 0) {
std::minstd_rand gen(Comp.getBatchSeed());
std::shuffle(PartitionIndex.begin(), PartitionIndex.end(), gen);
}
assert(PartitionIndex.size() == NumJobs);
return PartitionIndex;
}
/// Create \c NumberOfParallelCommands batches and assign each job to a
/// batch either filling each partition in order or, if seeded with a
/// nonzero value, pseudo-randomly (but deterministically and nearly-evenly).
void partitionIntoBatches(const llvm::SmallVectorImpl<const Job *> &Batchable,
BatchPartition &Partition) {
if (Comp.getShowJobLifecycle()) {
llvm::outs() << "Found " << Batchable.size() << " batchable jobs\n";
llvm::outs() << "Forming into " << Partition.size() << " batches\n";
}
assert(!Partition.empty());
auto PartitionIndex = assignJobsToPartitions(Partition.size(),
Batchable.size());
assert(PartitionIndex.size() == Batchable.size());
auto const &TC = Comp.getToolChain();
for_each(Batchable, PartitionIndex, [&](const Job *Cmd, size_t Idx) {
assert(Idx < Partition.size());
std::vector<const Job*> &P = Partition[Idx];
if (P.empty() || TC.jobsAreBatchCombinable(Comp, P[0], Cmd)) {
if (Comp.getShowJobLifecycle())
llvm::outs() << "Adding " << LogJob(Cmd)
<< " to batch " << Idx << '\n';
P.push_back(Cmd);
} else {
// Strange but theoretically possible that we have a batchable job
// that's not combinable with others; tack a new batch on for it.
if (Comp.getShowJobLifecycle())
llvm::outs() << "Adding " << LogJob(Cmd)
<< " to new batch " << Partition.size() << '\n';
Partition.push_back(std::vector<const Job*>());
Partition.back().push_back(Cmd);
}
});
}
// Selects the number of partitions based on the user-provided batch
// count and/or the number of parallel tasks we can run, subject to a
// fixed per-batch safety cap, to avoid overcommitting memory.
size_t pickNumberOfPartitions() {
// If the user asked for something, use that.
if (Comp.getBatchCount().has_value())
return Comp.getBatchCount().value();
// This is a long comment to justify a simple calculation.
//
// Because there is a secondary "outer" build system potentially also
// scheduling multiple drivers in parallel on separate build targets
// -- while we, the driver, schedule our own subprocesses -- we might
// be creating up to $NCPU^2 worth of _memory pressure_.
//
// Oversubscribing CPU is typically no problem these days, but
// oversubscribing memory can lead to paging, which on modern systems
// is quite bad.
//
// In practice, $NCPU^2 processes doesn't _quite_ happen: as core
// count rises, it usually exceeds the number of large targets
// without any dependencies between them (which are the only thing we
// have to worry about): you might have (say) 2 large independent
// modules * 2 architectures, but that's only an $NTARGET value of 4,
// which is much less than $NCPU if you're on a 24 or 36-way machine.
//
// So the actual number of concurrent processes is:
//
// NCONCUR := $NCPU * min($NCPU, $NTARGET)
//
// Empirically, a frontend uses about 512kb RAM per non-primary file
// and about 10mb per primary. The number of non-primaries per
// process is a constant in a given module, but the number of
// primaries -- the "batch size" -- is inversely proportional to the
// batch count (default: $NCPU). As a result, the memory pressure
// we can expect is:
//
// $NCONCUR * (($NONPRIMARYMEM * $NFILE) +
// ($PRIMARYMEM * ($NFILE/$NCPU)))
//
// If we tabulate this across some plausible values, we see
// unfortunate memory-pressure results:
//
// $NFILE
// +---------------------
// $NTARGET $NCPU | 100 500 1000
// ----------------+---------------------
// 2 2 | 2gb 11gb 22gb
// 4 4 | 4gb 24gb 48gb
// 4 8 | 5gb 28gb 56gb
// 4 16 | 7gb 36gb 72gb
// 4 36 | 11gb 56gb 112gb
//
// As it happens, the lower parts of the table are dominated by
// number of processes rather than the files-per-batch (the batches
// are already quite small due to the high core count) and the left
// side of the table is dealing with modules too small to worry
// about. But the middle and upper-right quadrant is problematic: 4
// and 8 core machines do not typically have 24-48gb of RAM, it'd be
// nice not to page on them when building a 4-target project with
// 500-file modules.
//
// Turns we can do that if we just cap the batch size statically at,
// say, 25 files per batch, we get a better formula:
//
// $NCONCUR * (($NONPRIMARYMEM * $NFILE) +
// ($PRIMARYMEM * min(25, ($NFILE/$NCPU))))
//
// $NFILE
// +---------------------
// $NTARGET $NCPU | 100 500 1000
// ----------------+---------------------
// 2 2 | 1gb 2gb 3gb
// 4 4 | 4gb 8gb 12gb
// 4 8 | 5gb 16gb 24gb
// 4 16 | 7gb 32gb 48gb
// 4 36 | 11gb 56gb 108gb
//
// This means that the "performance win" of batch mode diminishes
// slightly: the batching factor in the equation drops from
// ($NFILE/$NCPU) to min(25, $NFILE/$NCPU). In practice this seems to
// not cost too much: the additional factor in number of subprocesses
// run is the following:
//
// $NFILE
// +---------------------
// $NTARGET $NCPU | 100 500 1000
// ----------------+---------------------
// 2 2 | 2x 10x 20x
// 4 4 | - 5x 10x
// 4 8 | - 2.5x 5x
// 4 16 | - 1.25x 2.5x
// 4 36 | - - 1.1x
//
// Where - means "no difference" because the batches were already
// smaller than 25.
//
// Even in the worst case here, the 1000-file module on 2-core
// machine is being built with only 40 subprocesses, rather than the
// pre-batch-mode 1000. I.e. it's still running 96% fewer
// subprocesses than before. And significantly: it's doing so while
// not exceeding the RAM of a typical 2-core laptop.
// An explanation of why the partition calculation isn't integer division.
// Using an example, a module of 26 files exceeds the limit of 25 and must
// be compiled in 2 batches. Integer division yields 26/25 = 1 batch, but
// a single batch of 26 exceeds the limit. The calculation must round up,
// which can be calculated using: `(x + y - 1) / y`
auto DivideRoundingUp = [](size_t Num, size_t Div) -> size_t {
return (Num + Div - 1) / Div;
};
size_t DefaultSizeLimit = 25;
size_t NumTasks = TQ->getNumberOfParallelTasks();
size_t NumFiles = PendingExecution.size();
size_t SizeLimit = Comp.getBatchSizeLimit().value_or(DefaultSizeLimit);
return std::max(NumTasks, DivideRoundingUp(NumFiles, SizeLimit));
}
/// Select jobs that are batch-combinable from \c PendingExecution, combine
/// them together into \p BatchJob instances (also inserted into \p
/// BatchJobs), and enqueue all \c PendingExecution jobs (whether batched or
/// not) into the \c TaskQueue for execution.
void formBatchJobsAndAddPendingJobsToTaskQueue() {
// If batch mode is not enabled, just transfer the set of pending jobs to
// the task queue, as-is.
if (!Comp.getBatchModeEnabled()) {
transferJobsToTaskQueue(PendingExecution, "standard");
return;
}
size_t NumPartitions = pickNumberOfPartitions();
CommandSetVector Batchable, NonBatchable;
std::vector<const Job *> Batches;
// Split the batchable from non-batchable pending jobs.
getPendingBatchableJobs(Batchable, NonBatchable);
// Partition the batchable jobs into sets.
BatchPartition Partition(NumPartitions);
partitionIntoBatches(Batchable.takeVector(), Partition);
// Construct a BatchJob from each batch in the partition.
for (auto const &Batch : Partition) {
formBatchJobFromPartitionBatch(Batches, Batch);
}
PendingExecution.clear();
// Save batches so we can locate and decompose them on task-exit.
for (const Job *Cmd : Batches)
BatchJobs.insert(Cmd);
// Enqueue the resulting jobs, batched and non-batched alike.
transferJobsToTaskQueue(Batches, "batch");
transferJobsToTaskQueue(NonBatchable, "non-batch");
}
void runTaskQueueToCompletion() {
do {
using namespace std::placeholders;
// Ask the TaskQueue to execute.
if (TQ->execute(std::bind(&PerformJobsState::taskBegan, this, _1, _2),
std::bind(&PerformJobsState::taskFinished, this, _1, _2,
_3, _4, _5, _6),
std::bind(&PerformJobsState::taskSignalled, this, _1,
_2, _3, _4, _5, _6, _7))) {
if (ResultCode == EXIT_SUCCESS) {
// FIXME: Error from task queue while Result == EXIT_SUCCESS most
// likely means some fork/exec or posix_spawn failed; TaskQueue saw
// "an error" at some stage before even calling us with a process
// exit / signal (or else a poll failed); unfortunately the task
// causing it was dropped on the floor and we have no way to recover
// it here, so we report a very poor, generic error.
Comp.getDiags().diagnose(SourceLoc(),
diag::error_unable_to_execute_command,
"<unknown>");
ResultCode = -2;
AnyAbnormalExit = true;
return;
}
}
// Returning without error from TaskQueue::execute should mean either an
// empty TaskQueue or a failed subprocess.
assert(!(ResultCode == 0 && TQ->hasRemainingTasks()));
// Task-exit callbacks from TaskQueue::execute may have unblocked jobs,
// which means there might be PendingExecution jobs to enqueue here. If
// there are, we need to continue trying to make progress on the
// TaskQueue before we start marking deferred jobs as skipped, below.
if (!PendingExecution.empty() && ResultCode == 0) {
formBatchJobsAndAddPendingJobsToTaskQueue();
continue;
}
// It's possible that by marking some jobs as skipped, we unblocked
// some jobs and thus have entries in PendingExecution again; push
// those through to the TaskQueue.
formBatchJobsAndAddPendingJobsToTaskQueue();
// If we added jobs to the TaskQueue, and we are not in an error state,
// we want to give the TaskQueue another run.
} while (ResultCode == 0 && TQ->hasRemainingTasks());
}
public:
Compilation::Result takeResult() && {
if (ResultCode == 0)
ResultCode = Comp.getDiags().hadAnyError();
const bool hadAbnormalExit = hadAnyAbnormalExit();
const auto resultCode = ResultCode;
return Compilation::Result{hadAbnormalExit, resultCode};
}
bool hadAnyAbnormalExit() {
return AnyAbnormalExit;
}
};
} // namespace driver
} // namespace swift
Compilation::~Compilation() = default;
Job *Compilation::addJob(std::unique_ptr<Job> J) {
Job *result = J.get();
Jobs.emplace_back(std::move(J));
return result;
}
Job *Compilation::addExternalJob(std::unique_ptr<Job> J) {
Job *result = J.get();