csv or not csv: writeup

csv or not csv


Category	Stegano
Difficulty	Easy
Encoding	QR code v3 (29 × 29) в CSV
Flag	`flag{b4sica1ly_a_png}`

Recon

Дан файл file.csv. Нужно найти в нём флаг.

Артефакты: file.csv.

head -5 file.csv

x,y,c
20,3,0
11,12,0
28,26,0
3,14,1

Ключевые наблюдения:

Три числовых столбца: x, y, c.
c принимает только значения 0 и 1 — бинарный.
Диапазон x и y: от 0 до 28 — итого $29 \times 29 = 841$ точка.
Строк в файле ровно 841 — каждая клетка сетки заполнена.

В файле нет категорий, текстовых меток, сумм — это не таблица для анализа. Это описание двумерной сетки из пикселей.

python3 -c "
import csv
pts = list(csv.DictReader(open('file.csv')))
xs = [int(r['x']) for r in pts]
ys = [int(r['y']) for r in pts]
print(f'x: {min(xs)}..{max(xs)}, y: {min(ys)}..{max(ys)}, rows: {len(pts)}')
"

x: 0..28, y: 0..28, rows: 841

CSV как растровое изображение

Стеганография — способ скрыть данные, замаскировав их под безобидный формат (здесь CSV вместо PNG).

Размер $29 \times 29$ — не случайный. QR-коды имеют фиксированные версии:

Версия	Размер
1	21 × 21
2	25 × 25
3	29 × 29
4	33 × 33

Каждая следующая версия увеличивается на 4 модуля в каждую сторону: $21 + 4 \cdot (v-1)$.

Идея решения: каждая строка CSV говорит «в точке $(x, y)$ поставь модуль цвета $c$». Значение 1 — чёрный модуль QR, 0 — белый. Собираем все точки в матрицу, отрисовываем — получаем QR-код.

Два важных момента при построении:

Порядок осей. В массивах изображений принято image[row][column], то есть image[y][x] — не наоборот. Перепутать — классическая ошибка.
Белая рамка (quiet zone). QR-стандарт требует пустую белую зону вокруг кода. Без неё декодер не распознаёт паттерны нахождения углов.

Decryption

Шаг	Действие	Детали
1	Считать CSV	Список троек `(x, y, c)`
2	Построить матрицу	`matrix[y][x] = c`, размер `29 × 29`
3	Перевести в изображение	`c=1` → чёрный (0), `c=0` → белый (255)
4	Добавить белую рамку	Quiet zone шириной 1 модуль
5	Масштабировать × 20	Декодеру нужно достаточно пикселей
6	Декодировать QR	`cv2.QRCodeDetector().detectAndDecode(image)`

Сработавший вариант: swap_axes=False, invert=False, rotation=0, border=1 — данные записаны почти напрямую, нужна была только рамка.

Automation

#!/usr/bin/env python3

from __future__ import annotations

import argparse
import csv
import sys
from pathlib import Path

try:
    import cv2
    import numpy as np
except ImportError as exc:
    missing = exc.name or "opencv-python-headless"
    print(
        "Missing dependency: "
        f"{missing}. Install it with 'pip install opencv-python-headless'.",
        file=sys.stderr,
    )
    raise SystemExit(1) from exc


def load_points(csv_path: Path) -> list[tuple[int, int, int]]:
    with csv_path.open(newline="", encoding="ascii") as handle:
        return [
            (int(row["x"]), int(row["y"]), int(row["c"]))
            for row in csv.DictReader(handle)
        ]


def build_matrix(points: list[tuple[int, int, int]]) -> np.ndarray:
    size = max(max(x for x, _, _ in points), max(y for _, y, _ in points)) + 1
    matrix = np.zeros((size, size), dtype=np.uint8)
    for x, y, color in points:
        matrix[y, x] = color  # y — строка, x — столбец
    return matrix


def render_variant(
    matrix: np.ndarray,
    *,
    swap_axes: bool,
    invert: bool,
    rotation: int,
    border: int,
    scale: int,
) -> np.ndarray:
    working = matrix.T.copy() if swap_axes else matrix.copy()

    # QR: модуль 1 = чёрный (0), модуль 0 = белый (255)
    image = (255 * (1 - (working ^ invert))).astype(np.uint8)
    image = np.rot90(image, rotation)

    if border:
        framed = 255 * np.ones(
            (image.shape[0] + 2 * border, image.shape[1] + 2 * border),
            dtype=np.uint8,
        )
        framed[border : border + image.shape[0], border : border + image.shape[1]] = image
        image = framed

    return cv2.resize(
        image,
        (image.shape[1] * scale, image.shape[0] * scale),
        interpolation=cv2.INTER_NEAREST,  # без сглаживания — чёткие края модулей
    )


def decode_qr(matrix: np.ndarray) -> tuple[str, dict[str, int | bool], np.ndarray]:
    detector = cv2.QRCodeDetector()

    # Перебираем варианты: оси, инверсия, поворот, ширина рамки
    for swap_axes in (False, True):
        for invert in (False, True):
            for rotation in range(4):
                for border in (0, 1, 2, 4, 8):
                    image = render_variant(
                        matrix,
                        swap_axes=swap_axes,
                        invert=invert,
                        rotation=rotation,
                        border=border,
                        scale=20,
                    )
                    decoded, points, _ = detector.detectAndDecode(image)
                    if decoded:
                        meta = {
                            "swap_axes": swap_axes,
                            "invert": invert,
                            "rotation": rotation,
                            "border": border,
                        }
                        return decoded, meta, image

    raise ValueError("QR code could not be decoded from the CSV data")


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Rebuild a QR code from x,y,c CSV data and decode the hidden text."
    )
    parser.add_argument(
        "csv_file",
        nargs="?",
        default="file.csv",
        help="Path to the CSV file with x,y,c columns",
    )
    parser.add_argument(
        "--save-image",
        metavar="PATH",
        help="Optional path for saving the decoded QR image as PNG",
    )
    return parser.parse_args()


def main() -> int:
    args = parse_args()
    csv_path = Path(args.csv_file)
    if not csv_path.exists():
        print(f"File not found: {csv_path}", file=sys.stderr)
        return 1

    points = load_points(csv_path)
    matrix = build_matrix(points)
    decoded, meta, image = decode_qr(matrix)

    print(f"Grid size: {matrix.shape[1]}x{matrix.shape[0]}")
    print(
        "Variant: "
        f"swap_axes={meta['swap_axes']} "
        f"invert={meta['invert']} "
        f"rotation={meta['rotation']} "
        f"border={meta['border']}"
    )
    print(f"Decoded text: {decoded}")

    if args.save_image:
        output_path = Path(args.save_image)
        cv2.imwrite(str(output_path), image)
        print(f"Saved image: {output_path}")

    return 0


if __name__ == "__main__":
    raise SystemExit(main())

pip install opencv-python-headless
python3 solve.py
# Grid size: 29x29
# Variant: swap_axes=False invert=False rotation=0 border=1
# Decoded text: flag{b4sica1ly_a_png}

Key Takeaways

Расширение файла не гарантирует формат. CSV — это просто текст с разделителями. Три числовых столбца x, y, c с бинарным c не несут табличного смысла — это пиксельная карта.
Размеры QR-кодов фиксированы. Версия 1 — 21×21, версия 2 — 25×25, версия 3 — 29×29. Квадратная бинарная сетка подходящего размера — первым делом проверяй QR.
Ловушка задачи: попытка читать CSV как данные. Участники открывали файл в Excel и считали среднее по столбцу c — и ничего не находили. Ключевой вопрос, который нужно задать ещё на Recon: «зачем здесь координаты x и y?»