Separate configuration & patterns from code

2 years ago · 208c3ed5ad
parent e4fe661e77
commit 208c3ed5ad
1 changed files with 74 additions and 54 deletions
--- a/maintain.py
+++ b/maintain.py
@ -17,6 +17,10 @@ from typing import Callable, Iterable, List, Mapping

 warn = partial(print, file=sys.stderr)

+
+# === Configuration
+
+
 INDEX_DIR = ".index"
 DEFAULT_CATEGORY = "_toSort"
 OCR_LANGS = [
@ -27,6 +31,71 @@ DEFAULT_SCAN_SOURCE = "ADF Duplex"
 ALTERNATE_SCAN_SOURCE = "Flatbed"
 MIN_NUM_WIDTH = 6 # only used for INDEX_DIR files

+ID_AROUND_RANGE = 10
+
+
+# === Patterns
+
+
+SCAN_SUFFIXES = [ # Regexes
+    "jpe?g",
+    "pdf",
+    "png",
+]
+
+ID_REGEX = re.compile(r"""^
+    (
+        (?P<digital>d(igital)?) # no physical original
+        |
+        (
+            (?P<id_simple>\d+) # simple id
+            |
+            (?P<id_following>\d+)\+ # id and following id
+            |
+            (?P<id_following_twice>\d+)\+\+ # id and following 3 ids (this and following document with each 2 sides)
+            |
+            (?P<id_range_begin>\d+)-(?P<id_range_end>\d+) # id range
+        )(?P<around>\#)?
+    )
+$""", re.VERBOSE)
+
+SCAN_REGEX = re.compile(r"""^
+    ( # Date
+        (?P<date>\d{4}-\d{2}-\d{2})_
+    )?
+    # automatic prefix of scanimage
+    (out)?
+    # scan id
+    (?P<scan_id>
+        """ + ID_REGEX.pattern[1:-1] + r"""
+    )
+    ( # Description (optional)
+        _(?P<description>.*)
+    )?
+    # Suffix
+    \.(""" + "|".join(SCAN_SUFFIXES) + r""")
+$""", re.VERBOSE)
+SCAN_WARN_REGEX = re.compile(r"\.(" + "|".join(SCAN_SUFFIXES) + r")$")
+
+NUMBER_REGEX = re.compile(r"^\d+$")
+CONTENT_SPLIT_REGEX = re.compile(r"[\W]")
+
+DATE_REGEX = re.compile(r"(\d{2,4}-\d{1,2}-\d{1,2}|\d{1,2}\.\d{1,2}\.\d{2,4}|\d{1,2}\.\s+[a-zA-Z]+\s+\d{2,4})")
+DATE_FORMATS = [  # date.strptime compatible
+    "%Y-%m-%d",
+    "%y-%m-%d",
+    "%d.%m.%Y",
+    "%d.%m.%y",
+    "%d. %B %Y",
+    "%d. %B %y",
+    "%d. %b %Y",
+    "%d. %b %y",
+]
+
+
+# === Code
+
+
 def build_args(args: Iterable) -> str:
    return " ".join((shlex.quote(str(e)) for e in args))

@ -54,22 +123,6 @@ def rlinput(prompt, prefill=None, suggestions=[]):
    finally:
        readline.set_startup_hook()

-ID_REGEX = re.compile(r"""^
-    (
-        (?P<digital>d(igital)?) # no physical original
-        |
-        (
-            (?P<id_simple>\d+) # simple id
-            |
-            (?P<id_following>\d+)\+ # id and following id
-            |
-            (?P<id_following_twice>\d+)\+\+ # id and following 3 ids (this and following document with each 2 sides)
-            |
-            (?P<id_range_begin>\d+)-(?P<id_range_end>\d+) # id range
-        )(?P<around>\#)?
-    )
-$""", re.VERBOSE)
-ID_AROUND_RANGE = 10

@dataclass(eq=True, order=True, frozen=True)
 class IdRange:
@ -155,44 +208,6 @@ class IdRange:
        return self.fancy


-SCAN_SUFFIXES = [ # Regexes
-    "jpe?g",
-    "pdf",
-    "png",
-]
-
-SCAN_REGEX = re.compile(r"""^
-    ( # Date
-        (?P<date>\d{4}-\d{2}-\d{2})_
-    )?
-    # automatic prefix of scanimage
-    (out)?
-    # scan id
-    (?P<scan_id>
-        """ + ID_REGEX.pattern[1:-1] + r"""
-    )
-    ( # Description (optional)
-        _(?P<description>.*)
-    )?
-    # Suffix
-    \.(""" + "|".join(SCAN_SUFFIXES) + r""")
-$""", re.VERBOSE)
-SCAN_WARN_REGEX = re.compile(r"\.(" + "|".join(SCAN_SUFFIXES) + r")$")
-
-NUMBER_REGEX = re.compile(r"^\d+$")
-CONTENT_SPLIT_REGEX = re.compile(r"[\W]")
-
-DATE_REGEX = re.compile(r"(\d{2,4}-\d{1,2}-\d{1,2}|\d{1,2}\.\d{1,2}\.\d{2,4}|\d{1,2}\.\s+[a-zA-Z]+\s+\d{2,4})")
-DATE_FORMATS = [
-    "%Y-%m-%d",
-    "%y-%m-%d",
-    "%d.%m.%Y",
-    "%d.%m.%y",
-    "%d. %B %Y",
-    "%d. %B %y",
-    "%d. %b %Y",
-    "%d. %b %y",
-]
 def interpret_date(text: str) -> datetime:
    for date_format in DATE_FORMATS:
        try:
@ -200,13 +215,16 @@ def interpret_date(text: str) -> datetime:
        except ValueError:
            continue
    return None
+
 def format_date(date: datetime) -> str:
    return date.strftime(DATE_FORMATS[0])
+
 def avg(dates: list[datetime]) -> datetime:
    m = min(dates)
    s = sum((date - m for date in dates), start=timedelta())
    return m + (s / len(dates))

+
@dataclass
 class ScanFile:
    path: Path
@ -333,6 +351,7 @@ class ScanFile:
    def __eq__(self, other):
        return self.path == other.path

+
 SCAN_FORMATS: dict[Callable[[ScanFile], str]] = {
    "content": lambda scan: scan.text_content,
    "date": lambda scan: scan.date_from_content,
@ -405,6 +424,7 @@ def extract_dates(scans: List[ScanFile]) -> List[str]:
            dates[format_date(date)] = None
    return list(dates)

+
 # args dependent

 def read_single_id(args):