Universal Product Code (UPC).
The biip.upc
module contains Biip's support for parsing UPC formats.
This class can interpret the following UPC formats:
- UPC-A, 12 digits.
- UPC-E, 6 digits, with implicit number system 0 and no check digit.
- UPC-E, 7 digits, with explicit number system and no check digit.
- UPC-E, 8 digits, with explicit number system and a check digit.
If you only want to parse UPCs, you can import the UPC parser directly
instead of using biip.parse()
.
>>> from biip.upc import Upc
If parsing succeeds, it returns a Upc
object.
>>> upc_a = Upc.parse("042100005264")
>>> pprint(upc_a)
Upc(
value='042100005264',
format=UpcFormat.UPC_A,
number_system_digit=0,
payload='04210000526',
check_digit=4
)
A subset of the UPC-A values can be converted to a shorter UPC-E format by
suppressing zeros using as_upc_e()
.
>>> upc_a.as_upc_e()
'04252614'
All UPC-E values can be expanded to an UPC-A using
as_upc_a()
.
>>> upc_e = Upc.parse("04252614")
>>> pprint(upc_e)
Upc(
value='04252614',
format=UpcFormat.UPC_E,
number_system_digit=0,
payload='0425261',
check_digit=4
)
>>> upc_e.as_upc_a()
'042100005264'
UPC is a subset of the later GTIN standard: An UPC-A value is also a valid
GTIN-12 value.
>>> upc_e.as_gtin_12()
'042100005264'
The canonical format for persisting UPCs to e.g. a database is GTIN-14.
>>> upc_e.as_gtin_14()
'00042100005264'
Upc
dataclass
Data class containing an UPC.
Source code in src/biip/upc.py
| @dataclass
class Upc:
"""Data class containing an UPC."""
value: str
"""Raw unprocessed value."""
format: UpcFormat
"""UPC format, either UPC-A or UPC-E."""
number_system_digit: int
"""Number system digit."""
payload: str
"""The actual payload.
Including number system digit, manufacturer code, and product code. Excludes
the check digit.
"""
check_digit: int | None = None
"""Check digit used to check if the UPC-A as a whole is valid.
Set for UPC-A, but not set for UPC-E.
"""
@classmethod
def parse(
cls,
value: str,
*,
config: ParseConfig | None = None, # noqa: ARG003
) -> Upc:
"""Parse the given value into a [`Upc`][biip.upc.Upc] object.
The checksum is guaranteed to be valid if an UPC object is returned.
Args:
value: The value to parse.
config: Configuration options for parsing.
Returns:
UPC data structure with the successfully extracted data.
Raises:
ParseError: If the parsing fails.
"""
value = value.strip()
length = len(value)
if length not in (6, 7, 8, 12):
msg = (
f"Failed to parse {value!r} as UPC: "
f"Expected 6, 7, 8, or 12 digits, got {length}."
)
raise ParseError(msg)
if not value.isdecimal():
msg = f"Failed to parse {value!r} as UPC: Expected a numerical value."
raise ParseError(msg)
if length == 12:
return cls._parse_upc_a(value)
if length in (6, 7, 8):
return cls._parse_upc_e(value)
msg = "Unhandled UPC length. This is a bug." # pragma: no cover
raise NotImplementedError(msg) # pragma: no cover
@classmethod
def _parse_upc_a(cls, value: str) -> Upc:
assert len(value) == 12
payload = value[:-1]
number_system_digit = int(value[0])
check_digit = int(value[-1])
calculated_check_digit = gs1_standard_check_digit(payload)
if check_digit != calculated_check_digit:
msg = (
f"Invalid UPC-A check digit for {value!r}: "
f"Expected {calculated_check_digit!r}, got {check_digit!r}."
)
raise ParseError(msg)
return cls(
value=value,
format=UpcFormat.UPC_A,
payload=payload,
number_system_digit=number_system_digit,
check_digit=check_digit,
)
@classmethod
def _parse_upc_e(cls, value: str) -> Upc:
length = len(value)
assert length in (6, 7, 8)
if length == 6:
# Implicit number system 0, no check digit.
number_system_digit = 0
payload = f"{number_system_digit}{value}"
upc_a_payload = _upc_e_to_upc_a_expansion(f"{payload}0")[:-1]
check_digit = gs1_standard_check_digit(upc_a_payload)
elif length == 7:
# Explicit number system, no check digit.
number_system_digit = int(value[0])
payload = value
upc_a_payload = _upc_e_to_upc_a_expansion(f"{payload}0")[:-1]
check_digit = gs1_standard_check_digit(upc_a_payload)
elif length == 8:
# Explicit number system and check digit.
number_system_digit = int(value[0])
payload = value[:-1]
check_digit = int(value[-1])
else:
msg = "Unhandled UPC-E length. This is a bug." # pragma: no cover
raise Exception(msg) # noqa: TRY002 # pragma: no cover
# Control that the number system digit is correct.
if number_system_digit not in (0, 1):
msg = (
f"Invalid UPC-E number system for {value!r}: "
f"Expected 0 or 1, got {number_system_digit!r}."
)
raise ParseError(msg)
# Control that check digit is correct.
upc_a_payload = _upc_e_to_upc_a_expansion(f"{payload}{check_digit}")[:-1]
calculated_check_digit = gs1_standard_check_digit(upc_a_payload)
if check_digit != calculated_check_digit:
msg = (
f"Invalid UPC-E check digit for {value!r}: "
f"Expected {calculated_check_digit!r}, got {check_digit!r}."
)
raise ParseError(msg)
return cls(
value=value,
format=UpcFormat.UPC_E,
payload=payload,
number_system_digit=number_system_digit,
check_digit=check_digit,
)
def as_upc_a(self) -> str:
"""Format as UPC-A.
Returns:
A string with the UPC encoded as UPC-A.
References:
GS1 General Specifications, section 5.2.2.4.2
"""
if self.format == UpcFormat.UPC_A:
return f"{self.payload}{self.check_digit}"
if self.format == UpcFormat.UPC_E:
return _upc_e_to_upc_a_expansion(f"{self.payload}{self.check_digit}")
msg = ( # pragma: no cover
"Unhandled case while formatting as UPC-A. This is a bug."
)
raise NotImplementedError(msg) # pragma: no cover
def as_upc_e(self) -> str:
"""Format as UPC-E.
Returns:
A string with the UPC encoded as UPC-E, if possible.
Raises:
EncodeError: If encoding as UPC-E fails.
References:
GS1 General Specifications, section 5.2.2.4.1
"""
if self.format == UpcFormat.UPC_A:
return _upc_a_to_upc_e_suppression(f"{self.payload}{self.check_digit}")
if self.format == UpcFormat.UPC_E:
return f"{self.payload}{self.check_digit}"
msg = ( # pragma: no cover
"Unhandled case while formatting as UPC-E. This is a bug."
)
raise NotImplementedError(msg) # pragma: no cover
def as_gtin_12(self) -> str:
"""Format as GTIN-12."""
from biip.gtin import Gtin
return Gtin.parse(self.as_upc_a()).as_gtin_12()
def as_gtin_13(self) -> str:
"""Format as GTIN-13."""
from biip.gtin import Gtin
return Gtin.parse(self.as_upc_a()).as_gtin_13()
def as_gtin_14(self) -> str:
"""Format as GTIN-14."""
from biip.gtin import Gtin
return Gtin.parse(self.as_upc_a()).as_gtin_14()
|
check_digit
class-attribute
instance-attribute
check_digit: int | None = None
Check digit used to check if the UPC-A as a whole is valid.
Set for UPC-A, but not set for UPC-E.
UPC format, either UPC-A or UPC-E.
number_system_digit
instance-attribute
payload
instance-attribute
The actual payload.
Including number system digit, manufacturer code, and product code. Excludes
the check digit.
as_gtin_12
Format as GTIN-12.
Source code in src/biip/upc.py
| def as_gtin_12(self) -> str:
"""Format as GTIN-12."""
from biip.gtin import Gtin
return Gtin.parse(self.as_upc_a()).as_gtin_12()
|
as_gtin_13
Format as GTIN-13.
Source code in src/biip/upc.py
| def as_gtin_13(self) -> str:
"""Format as GTIN-13."""
from biip.gtin import Gtin
return Gtin.parse(self.as_upc_a()).as_gtin_13()
|
as_gtin_14
Format as GTIN-14.
Source code in src/biip/upc.py
| def as_gtin_14(self) -> str:
"""Format as GTIN-14."""
from biip.gtin import Gtin
return Gtin.parse(self.as_upc_a()).as_gtin_14()
|
as_upc_a
Format as UPC-A.
Returns:
-
str
–
A string with the UPC encoded as UPC-A.
References
GS1 General Specifications, section 5.2.2.4.2
Source code in src/biip/upc.py
| def as_upc_a(self) -> str:
"""Format as UPC-A.
Returns:
A string with the UPC encoded as UPC-A.
References:
GS1 General Specifications, section 5.2.2.4.2
"""
if self.format == UpcFormat.UPC_A:
return f"{self.payload}{self.check_digit}"
if self.format == UpcFormat.UPC_E:
return _upc_e_to_upc_a_expansion(f"{self.payload}{self.check_digit}")
msg = ( # pragma: no cover
"Unhandled case while formatting as UPC-A. This is a bug."
)
raise NotImplementedError(msg) # pragma: no cover
|
as_upc_e
Format as UPC-E.
Returns:
-
str
–
A string with the UPC encoded as UPC-E, if possible.
Raises:
References
GS1 General Specifications, section 5.2.2.4.1
Source code in src/biip/upc.py
| def as_upc_e(self) -> str:
"""Format as UPC-E.
Returns:
A string with the UPC encoded as UPC-E, if possible.
Raises:
EncodeError: If encoding as UPC-E fails.
References:
GS1 General Specifications, section 5.2.2.4.1
"""
if self.format == UpcFormat.UPC_A:
return _upc_a_to_upc_e_suppression(f"{self.payload}{self.check_digit}")
if self.format == UpcFormat.UPC_E:
return f"{self.payload}{self.check_digit}"
msg = ( # pragma: no cover
"Unhandled case while formatting as UPC-E. This is a bug."
)
raise NotImplementedError(msg) # pragma: no cover
|
parse
classmethod
parse(
value: str, *, config: ParseConfig | None = None
) -> Upc
Parse the given value into a Upc
object.
The checksum is guaranteed to be valid if an UPC object is returned.
Parameters:
-
value
(str
)
–
-
config
(ParseConfig | None
, default:
None
)
–
Configuration options for parsing.
Returns:
-
Upc
–
UPC data structure with the successfully extracted data.
Raises:
Source code in src/biip/upc.py
| @classmethod
def parse(
cls,
value: str,
*,
config: ParseConfig | None = None, # noqa: ARG003
) -> Upc:
"""Parse the given value into a [`Upc`][biip.upc.Upc] object.
The checksum is guaranteed to be valid if an UPC object is returned.
Args:
value: The value to parse.
config: Configuration options for parsing.
Returns:
UPC data structure with the successfully extracted data.
Raises:
ParseError: If the parsing fails.
"""
value = value.strip()
length = len(value)
if length not in (6, 7, 8, 12):
msg = (
f"Failed to parse {value!r} as UPC: "
f"Expected 6, 7, 8, or 12 digits, got {length}."
)
raise ParseError(msg)
if not value.isdecimal():
msg = f"Failed to parse {value!r} as UPC: Expected a numerical value."
raise ParseError(msg)
if length == 12:
return cls._parse_upc_a(value)
if length in (6, 7, 8):
return cls._parse_upc_e(value)
msg = "Unhandled UPC length. This is a bug." # pragma: no cover
raise NotImplementedError(msg) # pragma: no cover
|
Bases: Enum
Enum of UPC formats.
Source code in src/biip/upc.py
| class UpcFormat(Enum):
"""Enum of UPC formats."""
UPC_A = "upc_a"
"""UPC-A"""
UPC_E = "upc_e"
"""UPC-E"""
def __repr__(self) -> str:
"""Canonical string representation of format."""
return f"UpcFormat.{self.name}"
|