Skip to content
This repository was archived by the owner on Jun 15, 2023. It is now read-only.

Commit 3b32002

Browse files
atetubouTakuto Ikuta
authored and
Takuto Ikuta
committed
Make clparser faster
This patch improves perfromance of clparser. * Reduce the number of calling GetFullPathName. * Use StringPiece for Split and Join. * Add EqualsCaseInsensitive for StringPiece not to generate new string instance. * Add some utility member in StringPiece class.
1 parent 08a3220 commit 3b32002

6 files changed

+130
-79
lines changed

src/clparser.cc

+10-1
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
#include <assert.h>
1919
#include <string.h>
2020

21+
#include "metrics.h"
22+
2123
#ifdef _WIN32
2224
#include "includes_normalize.h"
25+
#include "string_piece.h"
2326
#else
2427
#include "util.h"
2528
#endif
@@ -72,9 +75,15 @@ bool CLParser::FilterInputFilename(string line) {
7275
// static
7376
bool CLParser::Parse(const string& output, const string& deps_prefix,
7477
string* filtered_output, string* err) {
78+
METRIC_RECORD("CLParser::Parse");
79+
7580
// Loop over all lines in the output to process them.
7681
assert(&output != filtered_output);
7782
size_t start = 0;
83+
#ifdef _WIN32
84+
IncludesNormalize normalizer(".");
85+
#endif
86+
7887
while (start < output.size()) {
7988
size_t end = output.find_first_of("\r\n", start);
8089
if (end == string::npos)
@@ -85,7 +94,7 @@ bool CLParser::Parse(const string& output, const string& deps_prefix,
8594
if (!include.empty()) {
8695
string normalized;
8796
#ifdef _WIN32
88-
if (!IncludesNormalize::Normalize(include, NULL, &normalized, err))
97+
if (!normalizer.Normalize(include, &normalized, err))
8998
return false;
9099
#else
91100
// TODO: should this make the path relative to cwd?

src/clparser_perftest.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ int main(int argc, char* argv[]) {
145145
}
146146
int64_t end = GetTimeMillis();
147147

148-
if (end - start > 100) {
148+
if (end - start > 2000) {
149149
int delta_ms = (int)(end - start);
150150
printf("Parse %d times in %dms avg %.1fus\n",
151151
limit, delta_ms, float(delta_ms * 1000) / limit);

src/includes_normalize-win32.cc

+91-36
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "includes_normalize.h"
1616

1717
#include "string_piece.h"
18+
#include "string_piece_util.h"
1819
#include "util.h"
1920

2021
#include <algorithm>
@@ -25,8 +26,45 @@
2526

2627
namespace {
2728

28-
/// Return true if paths a and b are on the same Windows drive.
29+
bool IsPathSeparator(char c) {
30+
return c == '/' || c == '\\';
31+
}
32+
33+
// Return true if paths a and b are on the same windows drive.
34+
// Return false if this funcation cannot check
35+
// whether or not on the same windows drive.
36+
bool SameDriveFast(StringPiece a, StringPiece b) {
37+
if (a.size() < 3 || b.size() < 3) {
38+
return false;
39+
}
40+
41+
if (!isalpha(a[0]) || !isalpha(b[0])) {
42+
return false;
43+
}
44+
45+
if (tolower(a[0]) != tolower(b[0])) {
46+
return false;
47+
}
48+
49+
if (a[1] != ':' || b[1] != ':') {
50+
return false;
51+
}
52+
53+
if (!IsPathSeparator(a[2]) ||
54+
!IsPathSeparator(b[2])) {
55+
return false;
56+
}
57+
58+
return true;
59+
}
60+
61+
// Return true if paths a and b are on the same Windows drive.
2962
bool SameDrive(StringPiece a, StringPiece b) {
63+
// Fast check.
64+
if (SameDriveFast(a, b)) {
65+
return true;
66+
}
67+
3068
char a_absolute[_MAX_PATH];
3169
char b_absolute[_MAX_PATH];
3270
GetFullPathName(a.AsString().c_str(), sizeof(a_absolute), a_absolute, NULL);
@@ -38,34 +76,54 @@ bool SameDrive(StringPiece a, StringPiece b) {
3876
return _stricmp(a_drive, b_drive) == 0;
3977
}
4078

41-
} // anonymous namespace
79+
bool IsAbsPath(StringPiece s) {
80+
if (s.size() < 3 ||
81+
!isalpha(s[0]) ||
82+
s[1] != ':' ||
83+
!IsPathSeparator(s[2])) {
84+
return false;
85+
}
86+
87+
// Check "." or ".." is contained in path.
88+
for (size_t i = 2; i < s.size(); ++i) {
89+
if (!IsPathSeparator(s[i])) {
90+
continue;
91+
}
4292

43-
string IncludesNormalize::Join(const vector<string>& list, char sep) {
44-
string ret;
45-
for (size_t i = 0; i < list.size(); ++i) {
46-
ret += list[i];
47-
if (i != list.size() - 1)
48-
ret += sep;
93+
// Check ".".
94+
if (i + 1 < s.size() && s[i+1] == '.' &&
95+
(i + 2 >= s.size() || IsPathSeparator(s[i+2]))) {
96+
return false;
97+
}
98+
99+
// Check "..".
100+
if (i + 2 < s.size() && s[i+1] == '.' && s[i+2] == '.' &&
101+
(i + 3 >= s.size() || IsPathSeparator(s[i+3]))) {
102+
return false;
103+
}
49104
}
50-
return ret;
51-
}
52105

53-
vector<string> IncludesNormalize::Split(const string& input, char sep) {
54-
vector<string> elems;
55-
stringstream ss(input);
56-
string item;
57-
while (getline(ss, item, sep))
58-
elems.push_back(item);
59-
return elems;
106+
return true;
60107
}
61108

62-
string IncludesNormalize::ToLower(const string& s) {
63-
string ret;
64-
transform(s.begin(), s.end(), back_inserter(ret), ::tolower);
65-
return ret;
109+
} // anonymous namespace
110+
111+
IncludesNormalize::IncludesNormalize(const string& relative_to) {
112+
relative_to_ = AbsPath(relative_to);
113+
splitted_relative_to_ = SplitStringPiece(relative_to_, '/');
66114
}
67115

68116
string IncludesNormalize::AbsPath(StringPiece s) {
117+
if (IsAbsPath(s)) {
118+
string result = s.AsString();
119+
for (size_t i = 0; i < result.size(); ++i) {
120+
if (result[i] == '\\') {
121+
result[i] = '/';
122+
}
123+
}
124+
return result;
125+
}
126+
69127
char result[_MAX_PATH];
70128
GetFullPathName(s.AsString().c_str(), sizeof(result), result, NULL);
71129
for (char* c = result; *c; ++c)
@@ -74,28 +132,30 @@ string IncludesNormalize::AbsPath(StringPiece s) {
74132
return result;
75133
}
76134

77-
string IncludesNormalize::Relativize(StringPiece path, const string& start) {
78-
vector<string> start_list = Split(AbsPath(start), '/');
79-
vector<string> path_list = Split(AbsPath(path), '/');
135+
string IncludesNormalize::Relativize(StringPiece path, const vector<StringPiece>& start_list) {
136+
string abs_path = AbsPath(path);
137+
vector<StringPiece> path_list = SplitStringPiece(abs_path, '/');
80138
int i;
81139
for (i = 0; i < static_cast<int>(min(start_list.size(), path_list.size()));
82140
++i) {
83-
if (ToLower(start_list[i]) != ToLower(path_list[i]))
141+
if (!EqualsCaseInsensitiveASCII(start_list[i], path_list[i])) {
84142
break;
143+
}
85144
}
86145

87-
vector<string> rel_list;
146+
vector<StringPiece> rel_list;
147+
rel_list.reserve(start_list.size() - i + path_list.size() - i);
88148
for (int j = 0; j < static_cast<int>(start_list.size() - i); ++j)
89149
rel_list.push_back("..");
90150
for (int j = i; j < static_cast<int>(path_list.size()); ++j)
91151
rel_list.push_back(path_list[j]);
92152
if (rel_list.size() == 0)
93153
return ".";
94-
return Join(rel_list, '/');
154+
return JoinStringPiece(rel_list, '/');
95155
}
96156

97-
bool IncludesNormalize::Normalize(const string& input, const char* relative_to,
98-
string* result, string* err) {
157+
bool IncludesNormalize::Normalize(const string& input,
158+
string* result, string* err) const {
99159
char copy[_MAX_PATH + 1];
100160
size_t len = input.size();
101161
if (len > _MAX_PATH) {
@@ -108,15 +168,10 @@ bool IncludesNormalize::Normalize(const string& input, const char* relative_to,
108168
return false;
109169
StringPiece partially_fixed(copy, len);
110170

111-
string curdir;
112-
if (!relative_to) {
113-
curdir = AbsPath(".");
114-
relative_to = curdir.c_str();
115-
}
116-
if (!SameDrive(partially_fixed, relative_to)) {
171+
if (!SameDrive(partially_fixed, relative_to_)) {
117172
*result = partially_fixed.AsString();
118173
return true;
119174
}
120-
*result = Relativize(partially_fixed, relative_to);
175+
*result = Relativize(partially_fixed, splitted_relative_to_);
121176
return true;
122177
}

src/includes_normalize.h

+11-7
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,19 @@ struct StringPiece;
2121
/// Utility functions for normalizing include paths on Windows.
2222
/// TODO: this likely duplicates functionality of CanonicalizePath; refactor.
2323
struct IncludesNormalize {
24+
/// Normalize path relative to |relative_to|.
25+
IncludesNormalize(const string& relative_to);
26+
2427
// Internal utilities made available for testing, maybe useful otherwise.
25-
static string Join(const vector<string>& list, char sep);
26-
static vector<string> Split(const string& input, char sep);
27-
static string ToLower(const string& s);
2828
static string AbsPath(StringPiece s);
29-
static string Relativize(StringPiece path, const string& start);
29+
static string Relativize(StringPiece path,
30+
const vector<StringPiece>& start_list);
3031

3132
/// Normalize by fixing slashes style, fixing redundant .. and . and makes the
32-
/// path relative to |relative_to|.
33-
static bool Normalize(const string& input, const char* relative_to,
34-
string* result, string* err);
33+
/// path relative to |relative_to_|.
34+
bool Normalize(const string& input, string* result, string* err) const;
35+
36+
private:
37+
string relative_to_;
38+
vector<StringPiece> splitted_relative_to_;
3539
};

src/includes_normalize_test.cc

+9-34
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include <direct.h>
2020

21+
#include "string_piece_util.h"
2122
#include "test.h"
2223
#include "util.h"
2324

@@ -26,22 +27,23 @@ namespace {
2627
string GetCurDir() {
2728
char buf[_MAX_PATH];
2829
_getcwd(buf, sizeof(buf));
29-
vector<string> parts = IncludesNormalize::Split(string(buf), '\\');
30-
return parts[parts.size() - 1];
30+
vector<StringPiece> parts = SplitStringPiece(buf, '\\');
31+
return parts[parts.size() - 1].AsString();
3132
}
3233

3334
string NormalizeAndCheckNoError(const string& input) {
3435
string result, err;
35-
EXPECT_TRUE(IncludesNormalize::Normalize(input.c_str(), NULL, &result, &err));
36+
IncludesNormalize normalizer(".");
37+
EXPECT_TRUE(normalizer.Normalize(input, &result, &err));
3638
EXPECT_EQ("", err);
3739
return result;
3840
}
3941

4042
string NormalizeRelativeAndCheckNoError(const string& input,
4143
const string& relative_to) {
4244
string result, err;
43-
EXPECT_TRUE(IncludesNormalize::Normalize(input.c_str(), relative_to.c_str(),
44-
&result, &err));
45+
IncludesNormalize normalizer(relative_to);
46+
EXPECT_TRUE(normalizer.Normalize(input, &result, &err));
4547
EXPECT_EQ("", err);
4648
return result;
4749
}
@@ -76,34 +78,6 @@ TEST(IncludesNormalize, Case) {
7678
EXPECT_EQ("A/B", NormalizeAndCheckNoError("A\\./B"));
7779
}
7880

79-
TEST(IncludesNormalize, Join) {
80-
vector<string> x;
81-
EXPECT_EQ("", IncludesNormalize::Join(x, ':'));
82-
x.push_back("alpha");
83-
EXPECT_EQ("alpha", IncludesNormalize::Join(x, ':'));
84-
x.push_back("beta");
85-
x.push_back("gamma");
86-
EXPECT_EQ("alpha:beta:gamma", IncludesNormalize::Join(x, ':'));
87-
}
88-
89-
TEST(IncludesNormalize, Split) {
90-
EXPECT_EQ("", IncludesNormalize::Join(IncludesNormalize::Split("", '/'),
91-
':'));
92-
EXPECT_EQ("a", IncludesNormalize::Join(IncludesNormalize::Split("a", '/'),
93-
':'));
94-
EXPECT_EQ("a:b:c",
95-
IncludesNormalize::Join(
96-
IncludesNormalize::Split("a/b/c", '/'), ':'));
97-
}
98-
99-
TEST(IncludesNormalize, ToLower) {
100-
EXPECT_EQ("", IncludesNormalize::ToLower(""));
101-
EXPECT_EQ("stuff", IncludesNormalize::ToLower("Stuff"));
102-
EXPECT_EQ("stuff and things", IncludesNormalize::ToLower("Stuff AND thINGS"));
103-
EXPECT_EQ("stuff 3and thin43gs",
104-
IncludesNormalize::ToLower("Stuff 3AND thIN43GS"));
105-
}
106-
10781
TEST(IncludesNormalize, DifferentDrive) {
10882
EXPECT_EQ("stuff.h",
10983
NormalizeRelativeAndCheckNoError("p:\\vs08\\stuff.h", "p:\\vs08"));
@@ -129,8 +103,9 @@ TEST(IncludesNormalize, LongInvalidPath) {
129103
"instead of /Zi, but expect a similar error when you link your program.";
130104
// Too long, won't be canonicalized. Ensure doesn't crash.
131105
string result, err;
106+
IncludesNormalize normalizer(".");
132107
EXPECT_FALSE(
133-
IncludesNormalize::Normalize(kLongInputString, NULL, &result, &err));
108+
normalizer.Normalize(kLongInputString, &result, &err));
134109
EXPECT_EQ("path too long", err);
135110

136111
const char kExactlyMaxPath[] =

src/string_piece.h

+8
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@ struct StringPiece {
5656
return str_ + len_;
5757
}
5858

59+
char operator[](size_t pos) const {
60+
return str_[pos];
61+
}
62+
63+
size_t size() const {
64+
return len_;
65+
}
66+
5967
const char* str_;
6068
size_t len_;
6169
};

0 commit comments

Comments
 (0)