-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
Copy pathCFURL.c
5494 lines (5080 loc) · 234 KB
/
CFURL.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* CFURL.c
Copyright (c) 1998-2019, Apple Inc. and the Swift project authors
Portions Copyright (c) 2014-2019, Apple Inc. and the Swift project authors
Licensed under Apache License v2.0 with Runtime Library Exception
See http://swift.org/LICENSE.txt for license information
See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
Responsibility: Jim Luther/Chris Linn
*/
#include "CFURL.h"
#include "CFPriv.h"
#include "CFCharacterSetPriv.h"
#include "CFNumber.h"
#include "CFInternal.h"
#include "CFRuntime_Internal.h"
#include <stdatomic.h>
#include "CFStringEncodingConverter.h"
#include <stdatomic.h>
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#if TARGET_OS_MAC || TARGET_OS_LINUX || TARGET_OS_BSD || TARGET_OS_WASI
#if TARGET_OS_OSX
#include "CFNumberFormatter.h"
#endif
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#if __has_include(<sys/syslog.h>)
#include <sys/syslog.h>
#elif __has_include(<syslog.h>)
#include <syslog.h>
#endif
#include "CFURLPriv.h"
#endif
#ifndef DEBUG_URL_MEMORY_USAGE
// enables various statistical counters which can be displayed with __CFURLDumpMemRecord().
#define DEBUG_URL_MEMORY_USAGE 0
#endif
#ifndef DEBUG_URL_INITIALIZER_LOGGING
// enables logging in URL initializer. You get to see the inputs and output for each URL created.
#define DEBUG_URL_INITIALIZER_LOGGING 0
#endif
static CFArrayRef WindowsPathToURLComponents(CFStringRef path, CFAllocatorRef alloc, Boolean isDir, Boolean isAbsolute) CF_RETURNS_RETAINED;
static CFStringRef WindowsPathToURLPath(CFStringRef path, CFAllocatorRef alloc, Boolean isDir, Boolean isAbsolute) CF_RETURNS_RETAINED;
static CFStringRef POSIXPathToURLPath(CFStringRef path, CFAllocatorRef alloc, Boolean isDirectory, Boolean isAbsolute, Boolean *posixAndUrlPathsMatch) CF_RETURNS_RETAINED;
static CFStringRef CreateStringFromFileSystemRepresentationByAddingPercentEscapes(CFAllocatorRef alloc, const UInt8 *bytes, CFIndex numBytes, Boolean isDirectory, Boolean isAbsolute, Boolean windowsPath, Boolean *addedPercentEncoding) CF_RETURNS_RETAINED;
CFStringRef CFURLCreateStringWithFileSystemPath(CFAllocatorRef allocator, CFURLRef anURL, CFURLPathStyle fsType, Boolean resolveAgainstBase) CF_RETURNS_RETAINED;
CF_EXPORT CFURLRef _CFURLCreateCurrentDirectoryURL(CFAllocatorRef allocator) CF_RETURNS_RETAINED;
#if TARGET_OS_MAC
static Boolean _CFURLHasFileURLScheme(CFURLRef url, Boolean *hasScheme);
#endif
// When __CONSTANT_CFSTRINGS__ is not defined, we have separate macros for static and exported constant strings, but
// when it is defined, we must prefix with static to prevent the string from being exported
#ifdef __CONSTANT_CFSTRINGS__
static CONST_STRING_DECL(kCFURLHTTPScheme, "http")
static CONST_STRING_DECL(kCFURLHTTPSScheme, "https")
static CONST_STRING_DECL(kCFURLFileScheme, "file")
static CONST_STRING_DECL(kCFURLDataScheme, "data")
static CONST_STRING_DECL(kCFURLFTPScheme, "ftp")
#if TARGET_OS_MAC
static CONST_STRING_DECL(kCFURLLocalhost, "localhost")
#endif
#else
CONST_STRING_DECL(kCFURLHTTPScheme, "http")
CONST_STRING_DECL(kCFURLHTTPSScheme, "https")
CONST_STRING_DECL(kCFURLFileScheme, "file")
CONST_STRING_DECL(kCFURLDataScheme, "data")
CONST_STRING_DECL(kCFURLFTPScheme, "ftp")
#if TARGET_OS_MAC
CONST_STRING_DECL(kCFURLLocalhost, "localhost")
#endif
#endif
#if DEBUG_URL_MEMORY_USAGE
static uint numURLs = 0; // number of URLs allocated
static uint numDealloced = 0; // number of URLs deallocated
static uint numURLsParsed = 0; // number of URLs created from a string which had to be parsed
static uint numExtraDataAllocated = 0; // number of URLs with additional data -- either because URLHandle was used, or because a sanitizedString was needed
static uint numURLsWithBaseURL = 0; // number of URLs with a baseURL
static uint numNonUTF8EncodedURLs = 0; // number of URLs that don't have UTF8 encoding
#endif
#define STACK_BUFFER_SIZE 1024
/* The bit flags in myURL->_flags */
// component bits
#define HAS_SCHEME (0x00000001)
#define HAS_USER (0x00000002)
#define HAS_PASSWORD (0x00000004)
#define HAS_HOST (0x00000008)
#define HAS_PORT (0x00000010)
#define HAS_PATH (0x00000020)
#define HAS_PARAMETERS (0x00000040)
#define HAS_QUERY (0x00000080)
#define HAS_FRAGMENT (0x00000100)
#define MAX_COMPONENTS 9
// various boolean flags
#define IS_IPV6_ENCODED (0x00000400)
#define IS_DIRECTORY (0x00000800)
#define IS_CANONICAL_FILE_URL (0x00001000) // if set, the URL is a file URL in the form "file://<absolute_percent_encoded_path>" (it was created from a file system path or representation)
#define PATH_HAS_FILE_ID (0x00002000)
#define IS_DECOMPOSABLE (0x00004000)
#define POSIX_AND_URL_PATHS_MATCH (0x00008000) // POSIX_AND_URL_PATHS_MATCH will only be true if the URL path and the POSIX path are identical, character for character, except for the presence/absence of a trailing slash on directories
#define ORIGINAL_AND_URL_STRINGS_MATCH (0x00010000)
// scheme bits and amount to shift it to translate to the kXXXXScheme enums
#define SCHEME_TYPE_MASK (0xE0000000)
#define SCHEME_SHIFT 29
enum {
kHasUncommonScheme = 0, // scheme is uncommon or scheme isn't in the canonical form (all lower case)
kHasHttpScheme = 1,
kHasHttpsScheme = 2,
kHasFileScheme = 3,
kHasDataScheme = 4,
kHasFtpScheme = 5,
kMaxScheme
};
// accessors for the scheme bits in _flags
CF_INLINE UInt32 _getSchemeTypeFromFlags(UInt32 flags);
CF_INLINE void _setSchemeTypeInFlags(UInt32 *flags, UInt32 schemeType);
// Other useful defines
#define NET_LOCATION_MASK (HAS_HOST | HAS_USER | HAS_PASSWORD | HAS_PORT)
#define RESOURCE_SPECIFIER_MASK (HAS_PARAMETERS | HAS_QUERY | HAS_FRAGMENT)
#define ALL_COMPONENTS_MASK (HAS_SCHEME | HAS_USER | HAS_PASSWORD | HAS_HOST | HAS_PORT | HAS_PATH | HAS_PARAMETERS | HAS_QUERY | HAS_FRAGMENT)
// These flags can be compared for equality since these are all set once when the CFURL is created.
// IS_CANONICAL_FILE_URL cannot be compared since we don't always create the URL string.
// POSIX_AND_URL_PATHS_MATCH cannot be compared because it may not be set
// ORIGINAL_AND_URL_STRINGS_MATCH cannot be compared because it gets set on demand later.
#define EQUAL_FLAGS_MASK (HAS_SCHEME | HAS_USER | HAS_PASSWORD | HAS_HOST | HAS_PORT | HAS_PATH | HAS_PARAMETERS | HAS_QUERY | HAS_FRAGMENT | IS_IPV6_ENCODED | IS_DIRECTORY | PATH_HAS_FILE_ID | IS_DECOMPOSABLE | SCHEME_TYPE_MASK )
// The value of FULL_URL_REPRESENTATION must not be in the CFURLPathStyle enums. Also, its value is exposed via _CFURLCopyPropertyListRepresentation to the Finder so don't change it.
#define FULL_URL_REPRESENTATION (0xF)
/* The bit flags in _CFURLAdditionalData->_additionalDataFlags */
/* If ORIGINAL_AND_URL_STRINGS_MATCH in myURL->_flags is false, these bits determine where they differ. XXXX_DIFFERS must match the HAS_XXXX */
#define SCHEME_DIFFERS HAS_SCHEME // Scheme can actually never differ because if there were escaped characters prior to the colon, we'd interpret the string as a relative path
#define USER_DIFFERS HAS_USER
#define PASSWORD_DIFFERS HAS_PASSWORD
#define HOST_DIFFERS HAS_HOST
#define PORT_DIFFERS HAS_PORT // Port can actually never differ because if there were a non-digit following a colon in the net location, we'd interpret the whole net location as the host
#define PATH_DIFFERS HAS_PATH // unused
#define PARAMETERS_DIFFER HAS_PARAMETERS // unused
#define QUERY_DIFFER HAS_QUERY // unused
#define FRAGMENT_DIFFER HAS_FRAGMENT // unused
#define FILE_ID_PREAMBLE "/.file/id="
#define FILE_ID_PREAMBLE_LENGTH 10
#define FILE_PREFIX "file://"
static const UInt8 fileURLPrefix[] = FILE_PREFIX;
// FILE_PREFIX_WITH_AUTHORITY and fileURLPrefixWithAuthority are only needed because some code incorrectly expects file URLs to have a host of "localhost", so if the application is linked on or before OS X 10.9 or iOS 7.0, we add "localhost" to file path URLs we create.
#define FILE_PREFIX_WITH_AUTHORITY "file://localhost"
static const UInt8 fileURLPrefixWithAuthority[] = FILE_PREFIX_WITH_AUTHORITY;
static Boolean AddAuthorityToFileURL(void)
{
static Boolean result = false;
return ( result );
}
// In order to reduce the sizeof ( __CFURL ), move these items into a separate structure which is
// only allocated when necessary. In my tests, it's almost never needed -- very rarely does a CFURL have
// either a sanitized string or a reserved pointer for URLHandle.
struct _CFURLAdditionalData {
void *_reserved; // Reserved for URLHandle's use.
CFStringRef _sanitizedString; // The fully compliant RFC string. This is only non-NULL if ORIGINAL_AND_URL_STRINGS_MATCH is false.
UInt32 _additionalDataFlags; // these flags only apply to things we need to keep state for in _CFURLAdditionalData (like the XXXX_DIFFERS flags)
};
struct __CFURL {
CFRuntimeBase _cfBase;
UInt32 _flags;
CFStringEncoding _encoding; // The encoding to use when asked to remove percent escapes
_Atomic(CFStringRef) _string; // Never NULL
CFURLRef _base;
struct _CFURLAdditionalData* _extra;
_Atomic(void *)_resourceInfo; // For use by CoreServicesInternal to cache property values. Retained and released by CFURL.
#if DEPLOYMENT_RUNTIME_SWIFT
CFRange _ranges[9]; // constant length (9) array of ranges in Swift
#else
CFRange _ranges[1]; // variable length (1 to 9) array of ranges
#endif
};
CF_INLINE void* _getReserved ( const struct __CFURL* url )
{
if ( url && url->_extra ) {
return ( url->_extra->_reserved );
}
else {
return ( NULL );
}
}
CF_INLINE CFStringRef _getSanitizedString(const struct __CFURL* url)
{
if ( url && url->_extra ) {
return ( url->_extra->_sanitizedString );
}
else {
return ( NULL );
}
}
CF_INLINE UInt32 _getAdditionalDataFlags(const struct __CFURL* url)
{
if ( url && url->_extra ) {
return ( url->_extra->_additionalDataFlags );
}
else {
return ( 0 );
}
}
CF_INLINE void* _getResourceInfo ( const struct __CFURL* url )
{
if ( url ) {
return atomic_load(&((struct __CFURL*)url)->_resourceInfo);
}
else {
return NULL;
}
}
CF_INLINE uint8_t _countRanges(const struct __CFURL *url) {
if (url) {
uint8_t numberOfRanges = 0;
if (url->_flags & HAS_SCHEME) numberOfRanges++;
if (url->_flags & HAS_USER) numberOfRanges++;
if (url->_flags & HAS_PASSWORD) numberOfRanges++;
if (url->_flags & HAS_HOST) numberOfRanges++;
if (url->_flags & HAS_PORT) numberOfRanges++;
if (url->_flags & HAS_PATH) numberOfRanges++;
if (url->_flags & HAS_PARAMETERS) numberOfRanges++;
if (url->_flags & HAS_QUERY) numberOfRanges++;
if (url->_flags & HAS_FRAGMENT) numberOfRanges++;
return numberOfRanges;
} else {
return 0;
}
}
static void _CFURLAllocateExtraDataspace( struct __CFURL* url )
{
if ( url && ! url->_extra )
{ struct _CFURLAdditionalData* extra = (struct _CFURLAdditionalData*) CFAllocatorAllocate( CFGetAllocator( url), sizeof( struct _CFURLAdditionalData ), 0);
extra->_reserved = _getReserved( url );
extra->_additionalDataFlags = _getAdditionalDataFlags(url);
extra->_sanitizedString = _getSanitizedString(url);
url->_extra = extra;
#if DEBUG_URL_MEMORY_USAGE
numExtraDataAllocated ++;
#endif
}
}
CF_INLINE void _setReserved ( struct __CFURL* url, void* reserved )
{
if ( url )
{
// Don't allocate extra space if we're just going to be storing NULL
if ( !url->_extra && reserved )
_CFURLAllocateExtraDataspace( url );
if ( url->_extra )
*((void **)&url->_extra->_reserved) = reserved;
}
}
CF_INLINE void _setSanitizedString( struct __CFURL* url, CFMutableStringRef sanitizedString )
{
if ( url )
{
// Don't allocate extra space if we're just going to be storing NULL
if ( !url->_extra && sanitizedString ) {
_CFURLAllocateExtraDataspace( url );
}
if ( url->_extra ) {
if ( url->_extra->_sanitizedString ) {
CFRelease(url->_extra->_sanitizedString);
}
url->_extra->_sanitizedString = CFStringCreateCopy(CFGetAllocator(url), sanitizedString);
}
}
}
CF_INLINE void _setAdditionalDataFlags(struct __CFURL* url, UInt32 additionalDataFlags)
{
if ( url )
{
// Don't allocate extra space if we're just going to be storing 0
if ( !url->_extra && (additionalDataFlags != 0) ) {
_CFURLAllocateExtraDataspace( url );
}
if ( url->_extra ) {
url->_extra->_additionalDataFlags = additionalDataFlags;
}
}
}
CF_INLINE void _setResourceInfo ( struct __CFURL* url, void* resourceInfo )
{
// Must be atomic
void *old = NULL;
if ( url && atomic_compare_exchange_strong_explicit(&url->_resourceInfo, &old, resourceInfo, memory_order_seq_cst, memory_order_relaxed)) {
CFRetain( resourceInfo );
}
}
CF_INLINE UInt32 _getSchemeTypeFromFlags(UInt32 flags)
{
return ( (flags & SCHEME_TYPE_MASK) >> SCHEME_SHIFT );
}
CF_INLINE void _setSchemeTypeInFlags(UInt32 *flags, UInt32 schemeType)
{
CFAssert2((schemeType >= kHasUncommonScheme) && (schemeType < kMaxScheme), __kCFLogAssertion, "%s(): Received bad schemeType %ud", __PRETTY_FUNCTION__, (unsigned int)schemeType);
*flags = (*flags & ~SCHEME_TYPE_MASK) + (schemeType << SCHEME_SHIFT);
}
static Boolean _fileSystemRepresentationHasFileIDPrefix(const UInt8 *buffer, CFIndex bufLen);
static Boolean _pathHasFileIDPrefix(CFStringRef path);
static CFStringRef _resolveFileSystemPaths(CFStringRef relativePath, CFStringRef basePath, Boolean baseIsDir, CFURLPathStyle fsType, CFAllocatorRef alloc) CF_RETURNS_RETAINED;
static void _parseComponents(CFAllocatorRef alloc, CFStringRef string, CFURLRef baseURL, UInt32 *theFlags, CFRange *packedRanges, uint8_t *numberOfRanges);
static CFRange _rangeForComponent(UInt32 flags, const CFRange *ranges, UInt32 compFlag);
static CFRange _netLocationRange(UInt32 flags, const CFRange *ranges);
static UInt32 _firstResourceSpecifierFlag(UInt32 flags);
static void computeSanitizedString(CFURLRef url);
static CFStringRef correctedComponent(CFStringRef component, UInt32 compFlag, CFStringEncoding enc) CF_RETURNS_RETAINED;
static CFMutableStringRef resolveAbsoluteURLString(CFAllocatorRef alloc, CFStringRef relString, UInt32 relFlags, const CFRange *relRanges, CFStringRef baseString, UInt32 baseFlags, const CFRange *baseRanges) CF_RETURNS_RETAINED;
static CFStringRef _resolvedPath(UniChar *pathStr, UniChar *end, UniChar pathDelimiter, Boolean stripLeadingDotDots, Boolean stripTrailingDelimiter, CFAllocatorRef alloc) CF_RETURNS_RETAINED;
enum {
VALID = 1,
ALPHA = 2,
PATHVALID = 4,
SCHEME = 8,
HEXDIGIT = 16
};
static const unsigned char sURLValidCharacters[128] = {
/* nul 0 */ 0,
/* soh 1 */ 0,
/* stx 2 */ 0,
/* etx 3 */ 0,
/* eot 4 */ 0,
/* enq 5 */ 0,
/* ack 6 */ 0,
/* bel 7 */ 0,
/* bs 8 */ 0,
/* ht 9 */ 0,
/* nl 10 */ 0,
/* vt 11 */ 0,
/* np 12 */ 0,
/* cr 13 */ 0,
/* so 14 */ 0,
/* si 15 */ 0,
/* dle 16 */ 0,
/* dc1 17 */ 0,
/* dc2 18 */ 0,
/* dc3 19 */ 0,
/* dc4 20 */ 0,
/* nak 21 */ 0,
/* syn 22 */ 0,
/* etb 23 */ 0,
/* can 24 */ 0,
/* em 25 */ 0,
/* sub 26 */ 0,
/* esc 27 */ 0,
/* fs 28 */ 0,
/* gs 29 */ 0,
/* rs 30 */ 0,
/* us 31 */ 0,
/* sp 32 */ 0,
/* '!' 33 */ VALID | PATHVALID ,
/* '"' 34 */ 0,
/* '#' 35 */ 0,
/* '$' 36 */ VALID | PATHVALID ,
/* '%' 37 */ 0,
/* '&' 38 */ VALID | PATHVALID ,
/* ''' 39 */ VALID | PATHVALID ,
/* '(' 40 */ VALID | PATHVALID ,
/* ')' 41 */ VALID | PATHVALID ,
/* '*' 42 */ VALID | PATHVALID ,
/* '+' 43 */ VALID | SCHEME | PATHVALID ,
/* ',' 44 */ VALID | PATHVALID ,
/* '-' 45 */ VALID | SCHEME | PATHVALID ,
/* '.' 46 */ VALID | SCHEME | PATHVALID ,
/* '/' 47 */ VALID | PATHVALID ,
/* '0' 48 */ VALID | SCHEME | PATHVALID | HEXDIGIT ,
/* '1' 49 */ VALID | SCHEME | PATHVALID | HEXDIGIT ,
/* '2' 50 */ VALID | SCHEME | PATHVALID | HEXDIGIT ,
/* '3' 51 */ VALID | SCHEME | PATHVALID | HEXDIGIT ,
/* '4' 52 */ VALID | SCHEME | PATHVALID | HEXDIGIT ,
/* '5' 53 */ VALID | SCHEME | PATHVALID | HEXDIGIT ,
/* '6' 54 */ VALID | SCHEME | PATHVALID | HEXDIGIT ,
/* '7' 55 */ VALID | SCHEME | PATHVALID | HEXDIGIT ,
/* '8' 56 */ VALID | SCHEME | PATHVALID | HEXDIGIT ,
/* '9' 57 */ VALID | SCHEME | PATHVALID | HEXDIGIT ,
/* ':' 58 */ VALID ,
/* ';' 59 */ VALID ,
/* '<' 60 */ 0,
/* '=' 61 */ VALID | PATHVALID ,
/* '>' 62 */ 0,
/* '?' 63 */ VALID ,
/* '@' 64 */ VALID ,
/* 'A' 65 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'B' 66 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'C' 67 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'D' 68 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'E' 69 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'F' 70 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'G' 71 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'H' 72 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'I' 73 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'J' 74 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'K' 75 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'L' 76 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'M' 77 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'N' 78 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'O' 79 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'P' 80 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'Q' 81 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'R' 82 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'S' 83 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'T' 84 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'U' 85 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'V' 86 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'W' 87 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'X' 88 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'Y' 89 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'Z' 90 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* '[' 91 */ 0,
/* '\' 92 */ 0,
/* ']' 93 */ 0,
/* '^' 94 */ 0,
/* '_' 95 */ VALID | PATHVALID ,
/* '`' 96 */ 0,
/* 'a' 97 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'b' 98 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'c' 99 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'd' 100 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'e' 101 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'f' 102 */ VALID | ALPHA | SCHEME | PATHVALID | HEXDIGIT ,
/* 'g' 103 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'h' 104 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'i' 105 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'j' 106 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'k' 107 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'l' 108 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'm' 109 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'n' 110 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'o' 111 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'p' 112 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'q' 113 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'r' 114 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 's' 115 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 't' 116 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'u' 117 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'v' 118 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'w' 119 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'x' 120 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'y' 121 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* 'z' 122 */ VALID | ALPHA | SCHEME | PATHVALID ,
/* '{' 123 */ 0,
/* '|' 124 */ 0,
/* '}' 125 */ 0,
/* '~' 126 */ VALID | PATHVALID ,
/* del 127 */ 0,
};
CF_INLINE Boolean isURLLegalCharacter(UniChar ch) {
return (ch <= 127) ? ((sURLValidCharacters[ch] & VALID) != 0) : false;
}
CF_INLINE Boolean scheme_valid(UniChar ch) {
return (ch <= 127) ? ((sURLValidCharacters[ch] & SCHEME) != 0) : false;
}
CF_INLINE Boolean isALPHA(UniChar ch) {
return (ch <= 127) ? ((sURLValidCharacters[ch] & ALPHA) != 0) : false;
}
/*
Currently unused, but left in for symmetry/informative purposes
CF_INLINE Boolean isPathLegalCharacter(UniChar ch) {
return (ch <= 127) ? ((sURLValidCharacters[ch] & PATHVALID) != 0) : false;
}
*/
CF_INLINE Boolean isHexDigit(UniChar ch) {
return (ch <= 127) ? ((sURLValidCharacters[ch] & HEXDIGIT) != 0) : false;
}
// Returns false if ch1 or ch2 isn't properly formatted
CF_INLINE Boolean _translateBytes(UniChar ch1, UniChar ch2, uint8_t *result) {
*result = 0;
if (ch1 >= '0' && ch1 <= '9') *result += (ch1 - '0');
else if (ch1 >= 'a' && ch1 <= 'f') *result += 10 + ch1 - 'a';
else if (ch1 >= 'A' && ch1 <= 'F') *result += 10 + ch1 - 'A';
else return false;
*result = (*result) << 4;
if (ch2 >= '0' && ch2 <= '9') *result += (ch2 - '0');
else if (ch2 >= 'a' && ch2 <= 'f') *result += 10 + ch2 - 'a';
else if (ch2 >= 'A' && ch2 <= 'F') *result += 10 + ch2 - 'A';
else return false;
return true;
}
enum {
IS_PCHAR = 0x01,
};
static const unsigned char sURLValidBytes[256] = {
/* nul 0 */ 0,
/* soh 1 */ 0,
/* stx 2 */ 0,
/* etx 3 */ 0,
/* eot 4 */ 0,
/* enq 5 */ 0,
/* ack 6 */ 0,
/* bel 7 */ 0,
/* bs 8 */ 0,
/* ht 9 */ 0,
/* nl 10 */ 0,
/* vt 11 */ 0,
/* np 12 */ 0,
/* cr 13 */ 0,
/* so 14 */ 0,
/* si 15 */ 0,
/* dle 16 */ 0,
/* dc1 17 */ 0,
/* dc2 18 */ 0,
/* dc3 19 */ 0,
/* dc4 20 */ 0,
/* nak 21 */ 0,
/* syn 22 */ 0,
/* etb 23 */ 0,
/* can 24 */ 0,
/* em 25 */ 0,
/* sub 26 */ 0,
/* esc 27 */ 0,
/* fs 28 */ 0,
/* gs 29 */ 0,
/* rs 30 */ 0,
/* us 31 */ 0,
/* sp 32 */ 0,
/* '!' 33 */ IS_PCHAR,
/* '"' 34 */ 0,
/* '#' 35 */ 0,
/* '$' 36 */ IS_PCHAR,
/* '%' 37 */ 0,
/* '&' 38 */ IS_PCHAR,
/* ''' 39 */ IS_PCHAR,
/* '(' 40 */ IS_PCHAR,
/* ')' 41 */ IS_PCHAR,
/* '*' 42 */ IS_PCHAR,
/* '+' 43 */ IS_PCHAR,
/* ',' 44 */ IS_PCHAR,
/* '-' 45 */ IS_PCHAR,
/* '.' 46 */ IS_PCHAR,
/* '/' 47 */ IS_PCHAR, // not really a pchar -- it's the segment delimiter
/* '0' 48 */ IS_PCHAR,
/* '1' 49 */ IS_PCHAR,
/* '2' 50 */ IS_PCHAR,
/* '3' 51 */ IS_PCHAR,
/* '4' 52 */ IS_PCHAR,
/* '5' 53 */ IS_PCHAR,
/* '6' 54 */ IS_PCHAR,
/* '7' 55 */ IS_PCHAR,
/* '8' 56 */ IS_PCHAR,
/* '9' 57 */ IS_PCHAR,
/* ':' 58 */ IS_PCHAR,
/* ';' 59 */ 0, // we need to percent-escape ';' in file system paths so it won't be mistaken for the start of the obsolete param rule (rfc2396) that CFURL still supports
/* '<' 60 */ 0,
/* '=' 61 */ IS_PCHAR,
/* '>' 62 */ 0,
/* '?' 63 */ 0,
/* '@' 64 */ IS_PCHAR,
/* 'A' 65 */ IS_PCHAR,
/* 'B' 66 */ IS_PCHAR,
/* 'C' 67 */ IS_PCHAR,
/* 'D' 68 */ IS_PCHAR,
/* 'E' 69 */ IS_PCHAR,
/* 'F' 70 */ IS_PCHAR,
/* 'G' 71 */ IS_PCHAR,
/* 'H' 72 */ IS_PCHAR,
/* 'I' 73 */ IS_PCHAR,
/* 'J' 74 */ IS_PCHAR,
/* 'K' 75 */ IS_PCHAR,
/* 'L' 76 */ IS_PCHAR,
/* 'M' 77 */ IS_PCHAR,
/* 'N' 78 */ IS_PCHAR,
/* 'O' 79 */ IS_PCHAR,
/* 'P' 80 */ IS_PCHAR,
/* 'Q' 81 */ IS_PCHAR,
/* 'R' 82 */ IS_PCHAR,
/* 'S' 83 */ IS_PCHAR,
/* 'T' 84 */ IS_PCHAR,
/* 'U' 85 */ IS_PCHAR,
/* 'V' 86 */ IS_PCHAR,
/* 'W' 87 */ IS_PCHAR,
/* 'X' 88 */ IS_PCHAR,
/* 'Y' 89 */ IS_PCHAR,
/* 'Z' 90 */ IS_PCHAR,
/* '[' 91 */ 0,
/* '\' 92 */ 0,
/* ']' 93 */ 0,
/* '^' 94 */ 0,
/* '_' 95 */ IS_PCHAR,
/* '`' 96 */ 0,
/* 'a' 97 */ IS_PCHAR,
/* 'b' 98 */ IS_PCHAR,
/* 'c' 99 */ IS_PCHAR,
/* 'd' 100 */ IS_PCHAR,
/* 'e' 101 */ IS_PCHAR,
/* 'f' 102 */ IS_PCHAR,
/* 'g' 103 */ IS_PCHAR,
/* 'h' 104 */ IS_PCHAR,
/* 'i' 105 */ IS_PCHAR,
/* 'j' 106 */ IS_PCHAR,
/* 'k' 107 */ IS_PCHAR,
/* 'l' 108 */ IS_PCHAR,
/* 'm' 109 */ IS_PCHAR,
/* 'n' 110 */ IS_PCHAR,
/* 'o' 111 */ IS_PCHAR,
/* 'p' 112 */ IS_PCHAR,
/* 'q' 113 */ IS_PCHAR,
/* 'r' 114 */ IS_PCHAR,
/* 's' 115 */ IS_PCHAR,
/* 't' 116 */ IS_PCHAR,
/* 'u' 117 */ IS_PCHAR,
/* 'v' 118 */ IS_PCHAR,
/* 'w' 119 */ IS_PCHAR,
/* 'x' 120 */ IS_PCHAR,
/* 'y' 121 */ IS_PCHAR,
/* 'z' 122 */ IS_PCHAR,
/* '{' 123 */ 0,
/* '|' 124 */ 0,
/* '}' 125 */ 0,
/* '~' 126 */ IS_PCHAR,
/* del 127 */ 0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
};
CF_INLINE Boolean is_pchar(unsigned char ch) {
return ( (sURLValidBytes[ch] & IS_PCHAR) != 0 );
}
/*
CreateStringFromFileSystemRepresentationByAddingPercentEscapes creates a CFString
for the path-absolute form of a URI path component from the native file system representation.
Note: this code uses '/' path separators
The rules for path-absolute from rfc3986 are:
path-absolute = "/" [ segment-nz *( "/" segment ) ]
segment = *pchar
segment-nz = 1*pchar
pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
pct-encoded = "%" HEXDIG HEXDIG
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
*/
static CFStringRef CreateStringFromFileSystemRepresentationByAddingPercentEscapes(CFAllocatorRef alloc, const UInt8 *bytes, CFIndex numBytes, Boolean isDirectory, Boolean isAbsolute, Boolean windowsPath, Boolean *addedPercentEncoding) CF_RETURNS_RETAINED
{
static const UInt8 hexchars[] = "0123456789ABCDEF";
const UInt8 *fileURLPrefixPtr;
size_t fileURLPrefixLength;
if ( AddAuthorityToFileURL() ) {
fileURLPrefixPtr = fileURLPrefixWithAuthority;
fileURLPrefixLength = sizeof(fileURLPrefixWithAuthority);
}
else {
fileURLPrefixPtr = fileURLPrefix;
fileURLPrefixLength = sizeof(fileURLPrefix);
}
size_t bufferLength = (PATH_MAX * 3) + (isAbsolute ? fileURLPrefixLength : 0) + (isDirectory ? 1 : 0);
STACK_BUFFER_DECL(UInt8, stackBuf, bufferLength); // worst case is every byte needs to be percent-escaped
UInt8 *bufStartPtr;
UInt8 *bufBytePtr;
const UInt8 *bytePtr = bytes;
CFIndex idx;
CFStringRef result;
Boolean addedPercent = FALSE;
// choose a buffer to percent-escape into.
if ( numBytes <= PATH_MAX ) {
bufStartPtr = &stackBuf[0];
}
else {
// worst case is every byte needs to be percent-escaped
bufferLength = (numBytes * 3) + (isAbsolute ? fileURLPrefixLength : 0) + (isDirectory ? 1 : 0);
bufStartPtr = (UInt8 *)malloc(bufferLength);
}
if ( bufStartPtr != NULL ) {
if ( isAbsolute ) {
// start with the fileURLPrefix
cf_strlcpy((char *)bufStartPtr, (char *)fileURLPrefixPtr, bufferLength);
bufBytePtr = bufStartPtr + fileURLPrefixLength - 1;
}
else {
bufBytePtr = bufStartPtr;
}
if ( !windowsPath ) {
for ( idx = 0; (idx < numBytes) && (*bytePtr != 0); ++idx ) {
if ( is_pchar(*bytePtr) ) {
*bufBytePtr++ = *bytePtr;
}
else {
*bufBytePtr++ = '%';
*bufBytePtr++ = hexchars[*bytePtr >> 4];
*bufBytePtr++ = hexchars[*bytePtr & 0x0f];
addedPercent = TRUE;
}
++bytePtr;
}
}
else {
for ( idx = 0; (idx < numBytes) && (*bytePtr != 0); ++idx ) {
if ( is_pchar(*bytePtr) && (*bytePtr != '/') ) { // percent-escape the forward slash if this is a windowsPath
*bufBytePtr++ = *bytePtr;
}
else {
*bufBytePtr++ = '%';
*bufBytePtr++ = hexchars[*bytePtr >> 4];
*bufBytePtr++ = hexchars[*bytePtr & 0x0f];
addedPercent = TRUE;
}
++bytePtr;
}
}
// did we convert numBytes?
if ( idx != numBytes ) {
// no, but it's OK if the remaining bytes are all nul (embedded nul bytes are not allowed)
const UInt8 *nullBytePtr = bytePtr;
for ( /* start where we left off */; (idx < numBytes) && (*nullBytePtr == '\0'); ++idx, ++nullBytePtr ) {
// do nothing
}
}
if ( idx == numBytes ) {
if ( isDirectory ) {
// if it is a directory and it doesn't end with PATH_SEP, append a PATH_SEP.
if ( windowsPath ) {
if ( bufBytePtr - bufStartPtr > 3 ) {
if ( strncmp((const char *)(bufBytePtr - 3), "%2F", 3) ) {
*bufBytePtr++ = '%';
*bufBytePtr++ = '2';
*bufBytePtr++ = 'F';
}
}
}
else {
if ( bytes[numBytes-1] != '/' ) {
*bufBytePtr++ = '/';
}
}
}
else {
// it is not a directory: remove any pathDelim characters at end (leaving at least one character)
if ( windowsPath ) {
while ( (numBytes > 1) && (bufBytePtr - bufStartPtr > 3) && (strncmp((const char *)(bufBytePtr - 3), "%2F", 3) == 0) ) {
bufBytePtr -= 3;
--numBytes;
}
}
else {
while ( (numBytes > 1) && (bytes[numBytes-1] == '/') ) {
--bufBytePtr;
--numBytes;
}
}
}
// create the result
result = CFStringCreateWithBytes(alloc, bufStartPtr, (CFIndex)(bufBytePtr-bufStartPtr), kCFStringEncodingUTF8, FALSE);
}
else {
// the remaining bytes were not all nul
result = NULL;
}
// free the buffer if we malloc'd it
if ( bufStartPtr != &stackBuf[0] ) {
free(bufStartPtr);
}
}
else {
result = NULL;
}
if ( addedPercentEncoding ) {
*addedPercentEncoding = addedPercent;
}
return ( result );
}
// Returns NULL if str cannot be converted for whatever reason, str if str contains no characters in need of escaping, or a newly-created string with the appropriate % escape codes in place. Caller must always release the returned string.
CF_INLINE CFStringRef _replacePathIllegalCharacters(CFStringRef str, CFAllocatorRef alloc, Boolean preserveSlashes) CF_RETURNS_RETAINED {
CFStringRef result = NULL;
CFIndex strlength = CFStringGetLength(str);
CFIndex bufferSize = CFStringGetMaximumSizeForEncoding(((strlength != 0) ? strlength : 1), kCFStringEncodingUTF8) + 1;
STACK_BUFFER_DECL(char, stackBuffer, STACK_BUFFER_SIZE);
char *bufferPtr;
if ( bufferSize <= STACK_BUFFER_SIZE ) {
bufferPtr = stackBuffer;
}
else {
bufferPtr = (char *)malloc(bufferSize);
}
if ( CFStringGetCString(str, bufferPtr, bufferSize, kCFStringEncodingUTF8) ) {
result = CreateStringFromFileSystemRepresentationByAddingPercentEscapes(kCFAllocatorDefault, (const UInt8 *)bufferPtr, strlen(bufferPtr), FALSE, FALSE, !preserveSlashes, NULL);
}
if ( bufferPtr != stackBuffer ) {
free(bufferPtr);
}
return result;
}
// if isSurrogatePair is true, then ch refers to a surrogate pair; else ch refers to a single UniChar
static Boolean _appendPercentEscapesForCharacter(UniChar *ch, Boolean isSurrogatePair, CFStringEncoding encoding, CFMutableStringRef str) {
Boolean result;
enum {
kMaxBytesPerUniChar = 8, // 8 bytes is the maximum a single UniChar can require in any current encodings; future encodings could require more
kMaxPercentEncodedUniChars = kMaxBytesPerUniChar * 3, // percent-encoding expands by 3x
};
uint8_t bytes[kMaxBytesPerUniChar];
CFIndex byteLength;
if ( CFStringEncodingUnicodeToBytes(encoding, 0, ch, isSurrogatePair ? 2 : 1, NULL, bytes, kMaxBytesPerUniChar, &byteLength) == kCFStringEncodingConversionSuccess ) {
static const UInt8 hexchars[] = "0123456789ABCDEF";
uint8_t *endPtr = bytes + byteLength;
UniChar charBuffer[kMaxPercentEncodedUniChars];
CFIndex charBufferIndex = 0;
for ( uint8_t *currByte = bytes; currByte < endPtr; currByte++ ) {
charBuffer[charBufferIndex++] = '%';
charBuffer[charBufferIndex++] = hexchars[*currByte >> 4];
charBuffer[charBufferIndex++] = hexchars[*currByte & 0x0f];
}
CFStringAppendCharacters(str, charBuffer, charBufferIndex);
result = true;
}
else {
// conversion failed
result = false;
}
return ( result );
}