1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118 | """Mimic comicbox.Comicbox functions for PDFs."""
from types import MappingProxyType
from marshmallow.fields import Constant, Nested
from comicbox.fields.collection_fields import (
LegacyNestedMDStringSetField,
StringSetField,
)
from comicbox.fields.fields import StringField
from comicbox.fields.pdf import PdfDateTimeField
from comicbox.fields.xml_fields import (
XmlLegacyNestedMDStringSetField,
XmlPdfDateTimeField,
XmlStringField,
XmlStringSetField,
)
from comicbox.schemas.base import BaseSubSchema
from comicbox.schemas.json_schemas import JsonSchema, JsonSubSchema
from comicbox.schemas.xml_schemas import (
XmlSchema,
XmlSubHeadSchema,
XmlSubSchema,
create_xml_headers,
)
class MuPDFSubSchema(JsonSubSchema):
"""muPDF Sub Schema."""
# https://pymupdf.readthedocs.io/en/latest/document.html#Document.metadata
author = StringSetField(as_string=True)
creator = StringField()
keywords = LegacyNestedMDStringSetField(as_string=True)
modDate = PdfDateTimeField() # noqa: N815
producer = StringField()
subject = StringSetField(as_string=True)
title = StringField()
class MuPDFSchema(JsonSchema):
"""muPDFSchema."""
ROOT_TAG: str = "MuPDF"
ROOT_KEYPATH: str = ROOT_TAG
LEGACY_NESTED_MD_KEYPATH: str = f"{ROOT_KEYPATH}.keywords"
MuPDF = Nested(MuPDFSubSchema)
class PDFSubSchema(BaseSubSchema):
"""PDF Data Sub Schema."""
class Meta(BaseSubSchema.Meta):
"""Schema options."""
include = MappingProxyType(
{
"@xmlns:pdf": Constant("http://ns.adobe.com/pdf/1.3/"),
"pdf:Author": XmlStringSetField(as_string=True),
"pdf:Creator": XmlStringField(),
"pdf:Keywords": XmlLegacyNestedMDStringSetField(as_string=True),
"pdf:ModDate": XmlPdfDateTimeField(),
"pdf:Producer": XmlStringField(),
"pdf:Subject": XmlStringSetField(as_string=True),
"pdf:Title": XmlStringField(),
}
)
class PDFRDFDescriptionSchema(XmlSubSchema):
"""PDF RDF Description Schema."""
class Meta(XmlSubSchema.Meta):
"""Schema options."""
include = MappingProxyType(
{
"@xmlns:rdf": Constant("http://www.w3.org/1999/02/22-rdf-syntax-ns"),
"rdf:Description": Nested(PDFSubSchema),
},
)
class PDFXMPMetaSchema(XmlSubHeadSchema):
"""PDF XMP Meta Schema."""
class Meta(XmlSubHeadSchema.Meta):
"""Schema options."""
NS = "x"
NS_URI = "adobe:ns:meta/"
XSD_URI = "http://ns.adobe.com/pdf/1.3/"
include = MappingProxyType(
{
**create_xml_headers(NS, NS_URI, XSD_URI),
"@x:xmptk": Constant(
"Adobe XMP Core 5.6-c140 79.160451, 2017/05/06-01:08:21"
),
"rdf:RDF": Nested(PDFRDFDescriptionSchema),
}
)
class PDFXmlSchema(XmlSchema):
"""PDF Schema."""
ROOT_TAG: str = "x:xmpmeta"
ROOT_KEYPATH: str = f"{ROOT_TAG}.rdf:RDF.rdf:Description"
LEGACY_NESTED_MD_KEYPATH: str = f"{ROOT_KEYPATH}.pdf:Keywords"
class Meta(XmlSchema.Meta):
"""Schema options."""
include = MappingProxyType({"x:xmpmeta": Nested(PDFXMPMetaSchema)})
|