@@ -94,17 +94,20 @@ def set_custom_rtl(
94
94
If set to `None`, the value will not be changed.
95
95
If set to an integer or string, it will be converted to its ASCII code.
96
96
The default value is -1, which sets no additional range to be converted.
97
- _max: The new maximum value for the range of custom characters that will be written right to left.
97
+ _max: The new maximum value for the range of custom characters that will
98
+ be written right to left.
98
99
If set to `None`, the value will not be changed.
99
100
If set to an integer or string, it will be converted to its ASCII code.
100
101
The default value is -1, which sets no additional range to be converted.
101
- specials: The new list of special characters to be inserted in the current insertion order.
102
+ specials: The new list of special characters to be inserted in the
103
+ current insertion order.
102
104
If set to `None`, the current value will not be changed.
103
105
If set to a string, it will be converted to a list of ASCII codes.
104
106
The default value is an empty list.
105
107
106
108
Returns:
107
- A tuple containing the new values for `CUSTOM_RTL_MIN`, `CUSTOM_RTL_MAX`, and `CUSTOM_RTL_SPECIAL_CHARS`.
109
+ A tuple containing the new values for `CUSTOM_RTL_MIN`,
110
+ `CUSTOM_RTL_MAX`, and `CUSTOM_RTL_SPECIAL_CHARS`.
108
111
"""
109
112
global CUSTOM_RTL_MIN , CUSTOM_RTL_MAX , CUSTOM_RTL_SPECIAL_CHARS
110
113
if isinstance (_min , int ):
@@ -919,7 +922,8 @@ def mergeScaledPage(
919
922
"""
920
923
deprecation_with_replacement (
921
924
"page.mergeScaledPage(page2, scale, expand)" ,
922
- "page2.add_transformation(Transformation().scale(scale)); page.merge_page(page2, expand)" ,
925
+ "page2.add_transformation(Transformation().scale(scale)); "
926
+ "page.merge_page(page2, expand)" ,
923
927
"3.0.0" ,
924
928
)
925
929
op = Transformation ().scale (scale , scale )
@@ -944,7 +948,8 @@ def mergeRotatedPage(
944
948
"""
945
949
deprecation_with_replacement (
946
950
"page.mergeRotatedPage(page2, rotation, expand)" ,
947
- "page2.add_transformation(Transformation().rotate(rotation)); page.merge_page(page2, expand)" ,
951
+ "page2.add_transformation(Transformation().rotate(rotation)); "
952
+ "page.merge_page(page2, expand)" ,
948
953
"3.0.0" ,
949
954
)
950
955
op = Transformation ().rotate (rotation )
@@ -970,7 +975,8 @@ def mergeTranslatedPage(
970
975
"""
971
976
deprecation_with_replacement (
972
977
"page.mergeTranslatedPage(page2, tx, ty, expand)" ,
973
- "page2.add_transformation(Transformation().translate(tx, ty)); page.merge_page(page2, expand)" ,
978
+ "page2.add_transformation(Transformation().translate(tx, ty)); "
979
+ "page.merge_page(page2, expand)" ,
974
980
"3.0.0" ,
975
981
)
976
982
op = Transformation ().translate (tx , ty )
@@ -1002,7 +1008,8 @@ def mergeRotatedTranslatedPage(
1002
1008
"""
1003
1009
deprecation_with_replacement (
1004
1010
"page.mergeRotatedTranslatedPage(page2, rotation, tx, ty, expand)" ,
1005
- "page2.add_transformation(Transformation().rotate(rotation).translate(tx, ty)); page.merge_page(page2, expand)" ,
1011
+ "page2.add_transformation(Transformation().rotate(rotation).translate(tx, ty)); "
1012
+ "page.merge_page(page2, expand)" ,
1006
1013
"3.0.0" ,
1007
1014
)
1008
1015
op = Transformation ().translate (- tx , - ty ).rotate (rotation ).translate (tx , ty )
@@ -1028,7 +1035,8 @@ def mergeRotatedScaledPage(
1028
1035
"""
1029
1036
deprecation_with_replacement (
1030
1037
"page.mergeRotatedScaledPage(page2, rotation, scale, expand)" ,
1031
- "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); page.merge_page(page2, expand)" ,
1038
+ "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); "
1039
+ "page.merge_page(page2, expand)" ,
1032
1040
"3.0.0" ,
1033
1041
)
1034
1042
op = Transformation ().rotate (rotation ).scale (scale , scale )
@@ -1060,7 +1068,8 @@ def mergeScaledTranslatedPage(
1060
1068
"""
1061
1069
deprecation_with_replacement (
1062
1070
"page.mergeScaledTranslatedPage(page2, scale, tx, ty, expand)" ,
1063
- "page2.add_transformation(Transformation().scale(scale).translate(tx, ty)); page.merge_page(page2, expand)" ,
1071
+ "page2.add_transformation(Transformation().scale(scale).translate(tx, ty)); "
1072
+ "page.merge_page(page2, expand)" ,
1064
1073
"3.0.0" ,
1065
1074
)
1066
1075
op = Transformation ().scale (scale , scale ).translate (tx , ty )
@@ -1095,7 +1104,8 @@ def mergeRotatedScaledTranslatedPage(
1095
1104
"""
1096
1105
deprecation_with_replacement (
1097
1106
"page.mergeRotatedScaledTranslatedPage(page2, rotation, tx, ty, expand)" ,
1098
- "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); page.merge_page(page2, expand)" ,
1107
+ "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); "
1108
+ "page.merge_page(page2, expand)" ,
1099
1109
"3.0.0" ,
1100
1110
)
1101
1111
op = Transformation ().rotate (rotation ).scale (scale , scale ).translate (tx , ty )
@@ -1359,10 +1369,13 @@ def _extract_text(
1359
1369
while NameObject (PG .RESOURCES ) not in objr :
1360
1370
# /Resources can be inherited sometimes so we look to parents
1361
1371
objr = objr ["/Parent" ].get_object ()
1362
- # if no parents we will have no /Resources will be available => an exception wil be raised
1372
+ # if no parents we will have no /Resources will be available
1373
+ # => an exception wil be raised
1363
1374
resources_dict = cast (DictionaryObject , objr [PG .RESOURCES ])
1364
1375
except Exception :
1365
- return "" # no resources means no text is possible (no font) we consider the file as not damaged, no need to check for TJ or Tj
1376
+ # no resources means no text is possible (no font) we consider the
1377
+ # file as not damaged, no need to check for TJ or Tj
1378
+ return ""
1366
1379
if "/Font" in resources_dict :
1367
1380
for f in cast (DictionaryObject , resources_dict ["/Font" ]):
1368
1381
cmaps [f ] = build_char_map (f , space_width , obj )
@@ -1428,7 +1441,9 @@ def current_spacewidth() -> float:
1428
1441
return _space_width / 1000.0
1429
1442
1430
1443
def process_operation (operator : bytes , operands : List ) -> None :
1431
- nonlocal cm_matrix , cm_stack , tm_matrix , tm_prev , output , text , char_scale , space_scale , _space_width , TL , font_size , cmap , orientations , rtl_dir , visitor_text
1444
+ nonlocal cm_matrix , cm_stack , tm_matrix , tm_prev , output , text
1445
+ nonlocal char_scale , space_scale , _space_width , TL , font_size , cmap
1446
+ nonlocal orientations , rtl_dir , visitor_text
1432
1447
global CUSTOM_RTL_MIN , CUSTOM_RTL_MAX , CUSTOM_RTL_SPECIAL_CHARS
1433
1448
1434
1449
check_crlf_space : bool = False
@@ -1509,10 +1524,12 @@ def process_operation(operator: bytes, operands: List) -> None:
1509
1524
text = ""
1510
1525
# rtl_dir = False
1511
1526
try :
1512
- # charMapTuple: font_type, float(sp_width / 2), encoding, map_dict, font-dictionary
1527
+ # charMapTuple: font_type, float(sp_width / 2), encoding,
1528
+ # map_dict, font-dictionary
1513
1529
charMapTuple = cmaps [operands [0 ]]
1514
1530
_space_width = charMapTuple [1 ]
1515
- # current cmap: encoding, map_dict, font resource name (internal name, not the real font-name),
1531
+ # current cmap: encoding, map_dict, font resource name
1532
+ # (internal name, not the real font-name),
1516
1533
# font-dictionary. The font-dictionary describes the font.
1517
1534
cmap = (
1518
1535
charMapTuple [2 ],
@@ -1575,7 +1592,10 @@ def process_operation(operator: bytes, operands: List) -> None:
1575
1592
t = tt .decode (
1576
1593
cmap [0 ], "surrogatepass"
1577
1594
) # apply str encoding
1578
- except Exception : # the data does not match the expectation, we use the alternative ; text extraction may not be good
1595
+ except Exception :
1596
+ # the data does not match the expectation,
1597
+ # we use the alternative ;
1598
+ # text extraction may not be good
1579
1599
t = tt .decode (
1580
1600
"utf-16-be" if cmap [0 ] == "charmap" else "charmap" ,
1581
1601
"surrogatepass" ,
@@ -1593,7 +1613,9 @@ def process_operation(operator: bytes, operands: List) -> None:
1593
1613
):
1594
1614
xx = ord (x )
1595
1615
# fmt: off
1596
- if ( # cases where the current inserting order is kept (punctuation,...)
1616
+ if (
1617
+ # cases where the current inserting order is
1618
+ # kept (punctuation,...)
1597
1619
(xx <= 0x2F ) # punctuations but...
1598
1620
or (0x3A <= xx and xx <= 0x40 ) # numbers (x30-39)
1599
1621
or (0x2000 <= xx and xx <= 0x206F ) # upper punctuations..
@@ -1809,9 +1831,11 @@ def extract_text(
1809
1831
will change if this function is made more sophisticated.
1810
1832
1811
1833
Arabic, Hebrew,... are extracted in the good order.
1812
- If required an custom RTL range of characters can be defined; see function set_custom_rtl
1834
+ If required an custom RTL range of characters can be defined;
1835
+ see function set_custom_rtl
1813
1836
1814
- Additionally you can provide visitor-methods to get informed on all operands and all text-objects.
1837
+ Additionally you can provide visitor-methods to get informed on all
1838
+ operands and all text-objects.
1815
1839
For example in some PDF files this can be useful to parse tables.
1816
1840
1817
1841
Args:
@@ -1938,9 +1962,9 @@ def _get_fonts(self) -> Tuple[Set[str], Set[str]]:
1938
1962
1939
1963
mediabox = _create_rectangle_accessor (PG .MEDIABOX , ())
1940
1964
"""
1941
- A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in default user space units,
1942
- defining the boundaries of the physical medium on which the page is
1943
- intended to be displayed or printed.
1965
+ A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
1966
+ default user space units, defining the boundaries of the physical medium on
1967
+ which the page is intended to be displayed or printed.
1944
1968
"""
1945
1969
1946
1970
@property
@@ -1965,10 +1989,10 @@ def mediaBox(self, value: RectangleObject) -> None: # deprecated
1965
1989
1966
1990
cropbox = _create_rectangle_accessor ("/CropBox" , (PG .MEDIABOX ,))
1967
1991
"""
1968
- A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in default user space units,
1969
- defining the visible region of default user space. When the page is
1970
- displayed or printed, its contents are to be clipped (cropped) to this
1971
- rectangle and then imposed on the output medium in some
1992
+ A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
1993
+ default user space units, defining the visible region of default user space.
1994
+ When the page is displayed or printed, its contents are to be clipped
1995
+ (cropped) to this rectangle and then imposed on the output medium in some
1972
1996
implementation-defined manner. Default value: same as :attr:`mediabox<mediabox>`.
1973
1997
"""
1974
1998
@@ -1989,9 +2013,9 @@ def cropBox(self, value: RectangleObject) -> None: # deprecated
1989
2013
1990
2014
bleedbox = _create_rectangle_accessor ("/BleedBox" , ("/CropBox" , PG .MEDIABOX ))
1991
2015
"""
1992
- A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in default user space units,
1993
- defining the region to which the contents of the page should be clipped
1994
- when output in a production environment.
2016
+ A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
2017
+ default user space units, defining the region to which the contents of the
2018
+ page should be clipped when output in a production environment.
1995
2019
"""
1996
2020
1997
2021
@property
@@ -2011,8 +2035,9 @@ def bleedBox(self, value: RectangleObject) -> None: # deprecated
2011
2035
2012
2036
trimbox = _create_rectangle_accessor ("/TrimBox" , ("/CropBox" , PG .MEDIABOX ))
2013
2037
"""
2014
- A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in default user space units,
2015
- defining the intended dimensions of the finished page after trimming.
2038
+ A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
2039
+ default user space units, defining the intended dimensions of the finished
2040
+ page after trimming.
2016
2041
"""
2017
2042
2018
2043
@property
@@ -2032,9 +2057,9 @@ def trimBox(self, value: RectangleObject) -> None: # deprecated
2032
2057
2033
2058
artbox = _create_rectangle_accessor ("/ArtBox" , ("/CropBox" , PG .MEDIABOX ))
2034
2059
"""
2035
- A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in default user space units,
2036
- defining the extent of the page's meaningful content as intended by the
2037
- page's creator.
2060
+ A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
2061
+ default user space units, defining the extent of the page's meaningful
2062
+ content as intended by the page's creator.
2038
2063
"""
2039
2064
2040
2065
@property
0 commit comments