Commit ae0732c
Speed up an integer to the power of a positive integer on CPU (pytorch#26020)
Summary:
Current integer scalar exps are always cast to double. This commit avoids cast if the tensor is also
integral and the scalar is positive to speed up.
Benchmark (Debian Buster, g++ 8, Intel(R) Xeon(R) E-2136 CPU @ 3.30GHz 0 0:0 3300.00 MHz , Debug
build, Turbo turned off):
```python
import timeit
for n, t in [(1000, 13000),
(10_000, 1300)]:
for e in (2, 3, 4):
for dtype in ('torch.int16', 'torch.int32', 'torch.int64'):
print(f'a.pow({e}) (a.numel() == {n}) for {t} times')
print(f'dtype {dtype}, {t} times', end='\t\t')
print(timeit.timeit(f'a.pow({e})',
setup=f'import torch; a = torch.arange({n}, device="cpu", dtype={dtype})',
number=t))
```
Before:
```
a.pow(2) (a.numel() == 1000) for 13000 times
dtype torch.int16, 13000 times 1.6958350749996498
a.pow(2) (a.numel() == 1000) for 13000 times
dtype torch.int32, 13000 times 0.7989626339999631
a.pow(2) (a.numel() == 1000) for 13000 times
dtype torch.int64, 13000 times 0.7973162800003593
a.pow(3) (a.numel() == 1000) for 13000 times
dtype torch.int16, 13000 times 1.8660746679997828
a.pow(3) (a.numel() == 1000) for 13000 times
dtype torch.int32, 13000 times 0.8101709959996697
a.pow(3) (a.numel() == 1000) for 13000 times
dtype torch.int64, 13000 times 0.8135280149999744
a.pow(4) (a.numel() == 1000) for 13000 times
dtype torch.int16, 13000 times 5.010833072999958
a.pow(4) (a.numel() == 1000) for 13000 times
dtype torch.int32, 13000 times 4.801007671999741
a.pow(4) (a.numel() == 1000) for 13000 times
dtype torch.int64, 13000 times 3.963344578000033
a.pow(2) (a.numel() == 10000) for 1300 times
dtype torch.int16, 1300 times 1.6216251330001796
a.pow(2) (a.numel() == 10000) for 1300 times
dtype torch.int32, 1300 times 0.5672429639998882
a.pow(2) (a.numel() == 10000) for 1300 times
dtype torch.int64, 1300 times 0.5544572270000572
a.pow(3) (a.numel() == 10000) for 1300 times
dtype torch.int16, 1300 times 1.656308512999658
a.pow(3) (a.numel() == 10000) for 1300 times
dtype torch.int32, 1300 times 1.502670819999821
a.pow(3) (a.numel() == 10000) for 1300 times
dtype torch.int64, 1300 times 0.5757876879997639
a.pow(4) (a.numel() == 10000) for 1300 times
dtype torch.int16, 1300 times 4.775718216999849
a.pow(4) (a.numel() == 10000) for 1300 times
dtype torch.int32, 1300 times 4.754745475000163
a.pow(4) (a.numel() == 10000) for 1300 times
dtype torch.int64, 1300 times 3.737249878000057
```
After:
```
a.pow(2) (a.numel() == 1000) for 13000 times
dtype torch.int16, 13000 times 1.1006453190002503
a.pow(2) (a.numel() == 1000) for 13000 times
dtype torch.int32, 13000 times 1.0849009019998448
a.pow(2) (a.numel() == 1000) for 13000 times
dtype torch.int64, 13000 times 1.093259106000005
a.pow(3) (a.numel() == 1000) for 13000 times
dtype torch.int16, 13000 times 1.0859826279997833
a.pow(3) (a.numel() == 1000) for 13000 times
dtype torch.int32, 13000 times 1.1076840900000207
a.pow(3) (a.numel() == 1000) for 13000 times
dtype torch.int64, 13000 times 1.0755480369998622
a.pow(4) (a.numel() == 1000) for 13000 times
dtype torch.int16, 13000 times 1.918211066999902
a.pow(4) (a.numel() == 1000) for 13000 times
dtype torch.int32, 13000 times 1.9183043200000611
a.pow(4) (a.numel() == 1000) for 13000 times
dtype torch.int64, 13000 times 1.930021430999659
a.pow(2) (a.numel() == 10000) for 1300 times
dtype torch.int16, 1300 times 0.7271483560002707
a.pow(2) (a.numel() == 10000) for 1300 times
dtype torch.int32, 1300 times 0.7289002070001516
a.pow(2) (a.numel() == 10000) for 1300 times
dtype torch.int64, 1300 times 0.7267536800000016
a.pow(3) (a.numel() == 10000) for 1300 times
dtype torch.int16, 1300 times 0.7301799359997858
a.pow(3) (a.numel() == 10000) for 1300 times
dtype torch.int32, 1300 times 0.7289195180001116
a.pow(3) (a.numel() == 10000) for 1300 times
dtype torch.int64, 1300 times 0.7270008230002531
a.pow(4) (a.numel() == 10000) for 1300 times
dtype torch.int16, 1300 times 1.5354506029998447
a.pow(4) (a.numel() == 10000) for 1300 times
dtype torch.int32, 1300 times 1.528263066999898
a.pow(4) (a.numel() == 10000) for 1300 times
dtype torch.int64, 1300 times 1.5369428439998956
```
---
Best viewed with whitespace changes turned off
Pull Request resolved: pytorch#26020
Differential Revision: D17485400
Pulled By: VitalyFedyunin
fbshipit-source-id: 3a16b074825a5aab0f7e7af3d8100f9e4b7011a31 parent 66d2750 commit ae0732c
2 files changed
+128
-97
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
35 | 35 | | |
36 | 36 | | |
37 | 37 | | |
38 | | - | |
39 | | - | |
40 | | - | |
41 | 38 | | |
| 39 | + | |
42 | 40 | | |
43 | 41 | | |
44 | 42 | | |
| |||
98 | 96 | | |
99 | 97 | | |
100 | 98 | | |
101 | | - | |
102 | | - | |
103 | | - | |
104 | | - | |
105 | | - | |
106 | | - | |
107 | | - | |
108 | | - | |
109 | | - | |
110 | | - | |
111 | | - | |
112 | | - | |
113 | | - | |
114 | | - | |
115 | | - | |
116 | | - | |
117 | | - | |
118 | | - | |
119 | | - | |
120 | | - | |
121 | | - | |
122 | | - | |
123 | | - | |
124 | | - | |
125 | | - | |
126 | | - | |
127 | | - | |
128 | | - | |
129 | | - | |
130 | | - | |
131 | | - | |
132 | | - | |
133 | | - | |
134 | | - | |
135 | | - | |
136 | | - | |
137 | | - | |
138 | | - | |
139 | | - | |
140 | | - | |
141 | | - | |
142 | | - | |
143 | | - | |
144 | | - | |
145 | | - | |
146 | | - | |
147 | | - | |
148 | | - | |
149 | | - | |
| 99 | + | |
| 100 | + | |
| 101 | + | |
| 102 | + | |
| 103 | + | |
| 104 | + | |
| 105 | + | |
| 106 | + | |
| 107 | + | |
| 108 | + | |
| 109 | + | |
| 110 | + | |
| 111 | + | |
| 112 | + | |
| 113 | + | |
| 114 | + | |
| 115 | + | |
| 116 | + | |
| 117 | + | |
| 118 | + | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
| 122 | + | |
| 123 | + | |
| 124 | + | |
| 125 | + | |
| 126 | + | |
| 127 | + | |
| 128 | + | |
| 129 | + | |
| 130 | + | |
| 131 | + | |
| 132 | + | |
| 133 | + | |
| 134 | + | |
| 135 | + | |
| 136 | + | |
| 137 | + | |
| 138 | + | |
| 139 | + | |
| 140 | + | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
| 144 | + | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
| 158 | + | |
| 159 | + | |
| 160 | + | |
| 161 | + | |
| 162 | + | |
| 163 | + | |
| 164 | + | |
| 165 | + | |
150 | 166 | | |
151 | 167 | | |
152 | 168 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1347 | 1347 | | |
1348 | 1348 | | |
1349 | 1349 | | |
1350 | | - | |
1351 | | - | |
1352 | | - | |
1353 | | - | |
1354 | | - | |
1355 | | - | |
1356 | | - | |
1357 | | - | |
1358 | | - | |
1359 | | - | |
1360 | | - | |
1361 | | - | |
1362 | | - | |
1363 | | - | |
1364 | | - | |
1365 | | - | |
1366 | | - | |
1367 | | - | |
1368 | | - | |
1369 | | - | |
1370 | | - | |
1371 | | - | |
1372 | | - | |
1373 | | - | |
1374 | | - | |
1375 | | - | |
1376 | | - | |
1377 | | - | |
1378 | | - | |
1379 | | - | |
1380 | | - | |
1381 | | - | |
1382 | | - | |
1383 | | - | |
1384 | | - | |
1385 | | - | |
1386 | | - | |
1387 | | - | |
1388 | | - | |
1389 | | - | |
1390 | | - | |
1391 | | - | |
1392 | | - | |
1393 | | - | |
1394 | | - | |
1395 | 1350 | | |
1396 | 1351 | | |
1397 | 1352 | | |
| |||
7022 | 6977 | | |
7023 | 6978 | | |
7024 | 6979 | | |
| 6980 | + | |
| 6981 | + | |
| 6982 | + | |
| 6983 | + | |
| 6984 | + | |
| 6985 | + | |
| 6986 | + | |
| 6987 | + | |
| 6988 | + | |
| 6989 | + | |
| 6990 | + | |
| 6991 | + | |
| 6992 | + | |
| 6993 | + | |
| 6994 | + | |
| 6995 | + | |
| 6996 | + | |
| 6997 | + | |
| 6998 | + | |
| 6999 | + | |
| 7000 | + | |
| 7001 | + | |
| 7002 | + | |
| 7003 | + | |
| 7004 | + | |
| 7005 | + | |
| 7006 | + | |
| 7007 | + | |
| 7008 | + | |
| 7009 | + | |
| 7010 | + | |
| 7011 | + | |
| 7012 | + | |
| 7013 | + | |
| 7014 | + | |
| 7015 | + | |
| 7016 | + | |
| 7017 | + | |
| 7018 | + | |
| 7019 | + | |
| 7020 | + | |
| 7021 | + | |
| 7022 | + | |
| 7023 | + | |
| 7024 | + | |
| 7025 | + | |
| 7026 | + | |
| 7027 | + | |
| 7028 | + | |
| 7029 | + | |
| 7030 | + | |
| 7031 | + | |
| 7032 | + | |
| 7033 | + | |
| 7034 | + | |
| 7035 | + | |
| 7036 | + | |
| 7037 | + | |
| 7038 | + | |
| 7039 | + | |
7025 | 7040 | | |
7026 | 7041 | | |
7027 | 7042 | | |
| |||
0 commit comments