From 417c1c94c4fe01933d9cbdec3bf2d0ae5e45a6f7 Mon Sep 17 00:00:00 2001 From: Nick Hollinghurst Date: Fri, 28 Feb 2025 15:56:17 +0000 Subject: [PATCH] drm/rp1: DPI interlace: Improve precision of PIO-generated VSYNC Instead of trying to minimize the delay between seeing HSYNC edge and asserting VSYNC, try to predict the next HSYNC edge precisely. This eliminates the round-trip delay but introduces mode-dependent rounding error. HSYNC->VSYNC lag reduced from ~30ns to -5ns..+10ns (plus up to 5ns synchronization jitter as before). This may benefit e.g. SCART HATs, particularly those that generate Composite Sync using a XNOR gate. Signed-off-by: Nick Hollinghurst --- drivers/gpu/drm/rp1/rp1-dpi/rp1_dpi_pio.c | 118 ++++++++++++---------- 1 file changed, 67 insertions(+), 51 deletions(-) --- a/drivers/gpu/drm/rp1/rp1-dpi/rp1_dpi_pio.c +++ b/drivers/gpu/drm/rp1/rp1-dpi/rp1_dpi_pio.c @@ -29,15 +29,31 @@ #include /* - * Start a PIO SM to generate an interrupt just after HSYNC onset, then another - * after a fixed delay (during which we assume HSYNC will have been deasserted). + * Start a PIO SM to generate two interrupts for each horizontal line. + * The first occurs shortly before the middle of the line. The second + * is timed such that after receiving the IRQ plus 1 extra delay cycle, + * another SM's output will align with the next HSYNC within -5ns .. +10ns. + * To achieve this, we need an accurate measure of (cycles per line) / 2. + * + * Measured GPIO -> { wait gpio ; irq set | irq wait ; sideset } -> GPIO + * round-trip delay is about 8 cycles when pins are not heavily loaded. + * + * PIO code ; Notional time % 1000-cycle period + * -------- ; --------------------------------- + * 0: wait 1 gpio 3 ; 0.. 8 + * 1: mov x, y ; 8.. 9 + * 2: jmp x--, 2 ; 9..499 (Y should be T/2 - 11) + * 3: irq set 1 ; 499..500 + * 4: mov x, y [8] ; 500..509 + * 5: jmp x--, 5 ; 509..999 + * 6: irq set 1 ; 999..1000 */ static int rp1dpi_pio_start_timer_both(struct rp1_dpi *dpi, u32 flags, u32 tc) { - static const u16 instructions[2][5] = { - { 0xa022, 0x2083, 0xc001, 0x0043, 0xc001 }, /* posedge */ - { 0xa022, 0x2003, 0xc001, 0x0043, 0xc001 }, /* negedge */ + static const u16 instructions[2][7] = { + { 0x2083, 0xa022, 0x0042, 0xc001, 0xa822, 0x0045, 0xc001 }, /* +H */ + { 0x2003, 0xa022, 0x0042, 0xc001, 0xa822, 0x0045, 0xc001 }, /* -H */ }; const struct pio_program prog = { .instructions = instructions[(flags & DRM_MODE_FLAG_NHSYNC) ? 1 : 0], @@ -51,16 +67,18 @@ static int rp1dpi_pio_start_timer_both(s return -EBUSY; offset = pio_add_program(dpi->pio, &prog); - if (offset == PIO_ORIGIN_ANY) + if (offset == PIO_ORIGIN_ANY) { + pio_sm_unclaim(dpi->pio, sm); return -EBUSY; + } pio_sm_config cfg = pio_get_default_sm_config(); pio_sm_set_enabled(dpi->pio, sm, false); - sm_config_set_wrap(&cfg, offset, offset + 4); + sm_config_set_wrap(&cfg, offset, offset + 6); pio_sm_init(dpi->pio, sm, offset, &cfg); - pio_sm_put(dpi->pio, sm, tc - 4); + pio_sm_put(dpi->pio, sm, tc - 11); pio_sm_exec(dpi->pio, sm, pio_encode_pull(false, false)); pio_sm_exec(dpi->pio, sm, pio_encode_out(pio_y, 32)); pio_sm_set_enabled(dpi->pio, sm, true); @@ -74,46 +92,36 @@ static int rp1dpi_pio_start_timer_both(s * suitable moment (which should be an odd number of half-lines since the * last active line), sample DE again to detect field phase. * - * This version assumes VFP length is within 2..129 half-lines for any field + * This version assumes VFP length is within 2..256 half-lines for any field * (one half-line delay is needed to sample DE; we always wait for the next - * half-line boundary to improve VSync start accuracy). + * half-line boundary to improve VSync start accuracy) and VBP in 1..255. */ static int rp1dpi_pio_vsync_ilace(struct rp1_dpi *dpi, struct drm_display_mode const *mode) { - static const int wrap_target = 14; - static const int wrap = 26; u16 instructions[] = { /* This is mutable */ + // .wrap_target 0xa0e6, // 0: mov osr, isr side 0 ; top: rewind parameters 0x2081, // 1: wait 1 gpio, 1 side 0 ; main: while (!DE) wait; 0x2783, // 2: wait 1 gpio, 3 side 0 [7] ; do { @HSync 0xc041, // 3: irq clear 1 side 0 ; flush stale IRQs 0x20c1, // 4: wait 1 irq, 1 side 0 ; @midline - 0x00c1, // 5: jmp pin, 1 side 0 ; } while (DE) + 0x00c2, // 5: jmp pin, 2 side 0 ; } while (DE) 0x0007, // 6: jmp 7 side 0 ; - 0x6027, // 7: out x, 7 side 0 ; x = VFPlen - 2 - 0x000a, // 8: jmp 10 side 0 ; while (x--) { - 0x20c1, // 9: wait 1 irq, 1 side 0 ; @halfline - 0x0049, // 10: jmp x--, 9 side 0 ; } - 0x6021, // 11: out x, 1 side 0 ; test for aligned case - 0x003a, // 12: jmp !x, 26 side 0 ; if (!x) goto precise; - 0x20c1, // 13: wait 1 irq, 1 side 0 ; @halfline - // .wrap_target ; vsjoin: - 0xb722, // 14: mov x, y side 1 [7] ; VSYNC=1; x = VSyncLen - 0xd041, // 15: irq clear 1 side 1 ; VSYNC=1; flush stale IRQs - 0x30c1, // 16: wait 1 irq, 1 side 1 ; VSYNC=1; do { @halfline - 0x1050, // 17: jmp x--, 16 side 1 ; VSYNC=1; } while (x--) - 0x6028, // 18: out x, 8 side 0 ; VSYNC=0; x = VBPLen - 0x0015, // 19: jmp 21 side 0 ; while (x--) { - 0x20c1, // 20: wait 1 irq, 1 side 0 ; @halfline - 0x0054, // 21: jmp x--, 20 side 0 ; } - 0x00c0, // 22: jmp pin, 0 side 0 ; if (DE) reset phase - 0x0018, // 23: jmp 24 side 0 ; - 0x00e1, // 24: jmp !osre, 1 side 0 ; if (!phase) goto main - 0x0000, // 25: jmp 0 side 0 ; goto top - 0x2083, // 26: wait 1 gpio, 3 side 0 ; precise: @HSync - // .wrap ; goto vsjoin + 0x6028, // 7: out x, 8 side 0 ; x = VFPlen - 2 + 0x20c1, // 8: wait 1 irq, 1 side 0 ; do { @halfline + 0x0048, // 9: jmp x--, 8 side 0 ; } while (x--) + 0xb022, // 10: mov x, y side 1 ; VSYNC=1; x = VSyncLen + 0x30c1, // 11: wait 1 irq, 1 side 1 ; VSYNC=1; do { @halfline + 0x104b, // 12: jmp x--, 11 side 1 ; VSYNC=1; } while (x--) + 0x6028, // 13: out x, 8 side 0 ; VSYNC=0; x = VBPLen - 1 + 0x20c1, // 14: wait 1 irq, 1 side 0 ; do { @halfline + 0x004e, // 15: jmp x--, 14 side 0 ; } while (x--) + 0x00c0, // 16: jmp pin, 0 side 0 ; if (DE) reset phase + 0x0012, // 17: jmp 18 side 0 ; + 0x00e1, // 18: jmp !osre, 1 side 0 ; if (!phase) goto main + // .wrap ; goto top }; struct pio_program prog = { .instructions = instructions, @@ -129,8 +137,16 @@ static int rp1dpi_pio_vsync_ilace(struct if (sm < 0) return -EBUSY; - /* Compute mid-line time constant and start the timer SM */ - tc = (mode->htotal * (u64)sysclk) / (u64)(2000u * mode->clock); + /* + * Compute half-line time constant (round uppish so that VSync should + * switch never > 5ns before DPICLK, while defeating roundoff errors) + * and start the timer SM. + */ + tc = (u32)clk_get_rate(dpi->clocks[RP1DPI_CLK_DPI]); + if (!tc) + tc = 1000u * mode->clock; + tc = ((u64)mode->htotal * (u64)sysclk + ((7ul * tc) >> 2)) / + (u64)(2ul * tc); if (rp1dpi_pio_start_timer_both(dpi, mode->flags, tc) < 0) { pio_sm_unclaim(dpi->pio, sm); return -EBUSY; @@ -141,15 +157,15 @@ static int rp1dpi_pio_vsync_ilace(struct if (dpi->de_inv) { instructions[1] ^= 0x0080; instructions[5] = 0x00c7; - instructions[6] = 0x0001; - instructions[22] = 0x00d8; - instructions[23] = 0x0000; + instructions[6] = 0x0002; + instructions[16] = 0x00d2; + instructions[17] = 0x0000; } - for (i = 0; i < ARRAY_SIZE(instructions); i++) { - if (mode->flags & DRM_MODE_FLAG_NVSYNC) + if (mode->flags & DRM_MODE_FLAG_NHSYNC) + instructions[2] ^= 0x0080; + if (mode->flags & DRM_MODE_FLAG_NVSYNC) { + for (i = 0; i < ARRAY_SIZE(instructions); i++) instructions[i] ^= 0x1000; - if ((mode->flags & DRM_MODE_FLAG_NHSYNC) && (instructions[i] & 0xe07f) == 0x2003) - instructions[i] ^= 0x0080; } offset = pio_add_program(dpi->pio, &prog); if (offset == PIO_ORIGIN_ANY) @@ -157,7 +173,7 @@ static int rp1dpi_pio_vsync_ilace(struct /* Configure pins and SM */ dpi->pio_stole_gpio2 = true; - sm_config_set_wrap(&cfg, offset + wrap_target, offset + wrap); + sm_config_set_wrap(&cfg, offset, offset + ARRAY_SIZE(instructions) - 1); sm_config_set_sideset(&cfg, 1, false, false); sm_config_set_sideset_pins(&cfg, 2); pio_gpio_init(dpi->pio, 2); @@ -168,17 +184,17 @@ static int rp1dpi_pio_vsync_ilace(struct /* Compute vertical times, remembering how we rounded vdisplay, vtotal */ vfp = mode->vsync_start - (mode->vdisplay & ~1); vbp = (mode->vtotal | 1) - mode->vsync_end; - if (vfp > 128) { - vbp += vfp - 128; - vfp = 128; + if (vfp > 256) { + vbp += vfp - 256; + vfp = 256; } else if (vfp < 3) { - vbp = (vbp > 3 - vfp) ? (vbp - 3 + vfp) : 0; + vbp = (vbp > 3 - vfp) ? (vbp - 3 + vfp) : 1; vfp = 3; } pio_sm_put(dpi->pio, sm, - (vfp - 2) + ((vfp & 1) << 7) + (vbp << 8) + - ((vfp - 3) << 16) + (((~vfp) & 1) << 23) + ((vbp + 1) << 24)); + (vfp - 2) + ((vbp - 1) << 8) + + ((vfp - 3) << 16) + (vbp << 24)); pio_sm_put(dpi->pio, sm, mode->vsync_end - mode->vsync_start - 1); pio_sm_exec(dpi->pio, sm, pio_encode_pull(false, false)); pio_sm_exec(dpi->pio, sm, pio_encode_out(pio_y, 32));