[FFmpeg-devel] [PATCH v2 2/2] lavc/ccaption_dec: implement special and extended character sets

2016-02-13 Thread Aman Gupta
From: Aman Gupta 

character sets implemented as defined in 
https://en.wikipedia.org/wiki/EIA-608#Characters
---
 libavcodec/ccaption_dec.c | 152 +-
 1 file changed, 149 insertions(+), 3 deletions(-)

diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
index 5fb2ec6..fc361b2 100644
--- a/libavcodec/ccaption_dec.c
+++ b/libavcodec/ccaption_dec.c
@@ -63,6 +63,116 @@ enum cc_font {
 CCFONT_UNDERLINED_ITALICS,
 };
 
+enum cc_charset {
+CCSET_BASIC_AMERICAN,
+CCSET_SPECIAL_AMERICAN,
+CCSET_EXTENDED_SPANISH_FRENCH_MISC,
+CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH,
+};
+
+static const char *charset_overrides[4][128] =
+{
+[CCSET_BASIC_AMERICAN] = {
+[0x27] = "’",
+[0x2a] = "á",
+[0x5c] = "é",
+[0x5e] = "í",
+[0x5f] = "ó",
+[0x60] = "ú",
+[0x7b] = "ç",
+[0x7c] = "÷",
+[0x7d] = "Ñ",
+[0x7e] = "ñ",
+[0x7f] = "\u2588"
+},
+[CCSET_SPECIAL_AMERICAN] = {
+[0x30] = "®",
+[0x31] = "°",
+[0x32] = "½",
+[0x33] = "¿",
+[0x34] = "™",
+[0x35] = "¢",
+[0x36] = "£",
+[0x37] = "♪",
+[0x38] = "à",
+[0x39] = "\u00A0",
+[0x3a] = "è",
+[0x3b] = "â",
+[0x3c] = "ê",
+[0x3d] = "î",
+[0x3e] = "ô",
+[0x3f] = "û",
+},
+[CCSET_EXTENDED_SPANISH_FRENCH_MISC] = {
+[0x20] = "Á",
+[0x21] = "É",
+[0x22] = "Ó",
+[0x23] = "Ú",
+[0x24] = "Ü",
+[0x25] = "ü",
+[0x26] = "´",
+[0x27] = "¡",
+[0x28] = "*",
+[0x29] = "‘",
+[0x2a] = "-",
+[0x2b] = "©",
+[0x2c] = "℠",
+[0x2d] = "·",
+[0x2e] = "“",
+[0x2f] = "”",
+[0x30] = "À",
+[0x31] = "Â",
+[0x32] = "Ç",
+[0x33] = "È",
+[0x34] = "Ê",
+[0x35] = "Ë",
+[0x36] = "ë",
+[0x37] = "Î",
+[0x38] = "Ï",
+[0x39] = "ï",
+[0x3a] = "Ô",
+[0x3b] = "Ù",
+[0x3c] = "ù",
+[0x3d] = "Û",
+[0x3e] = "«",
+[0x3f] = "»",
+},
+[CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH] = {
+[0x20] = "Ã",
+[0x21] = "ã",
+[0x22] = "Í",
+[0x23] = "Ì",
+[0x24] = "ì",
+[0x25] = "Ò",
+[0x26] = "ò",
+[0x27] = "Õ",
+[0x28] = "õ",
+[0x29] = "{",
+[0x2a] = "}",
+[0x2b] = "\\",
+[0x2c] = "^",
+[0x2d] = "_",
+[0x2e] = "|",
+[0x2f] = "~",
+[0x30] = "Ä",
+[0x31] = "ä",
+[0x32] = "Ö",
+[0x33] = "ö",
+[0x34] = "ß",
+[0x35] = "¥",
+[0x36] = "¤",
+[0x37] = "¦",
+[0x38] = "Å",
+[0x39] = "å",
+[0x3a] = "Ø",
+[0x3b] = "ø",
+[0x3c] = "┌",
+[0x3d] = "┐",
+[0x3e] = "└",
+[0x3f] = "┘",
+},
+};
+
 static const unsigned char pac2_attribs[32][3] = // Color, font, ident
 {
 { CCCOL_WHITE,   CCFONT_REGULAR,0 },  // 0x40 || 0x60
@@ -103,6 +213,7 @@ static const unsigned char pac2_attribs[32][3] = // Color, 
font, ident
 struct Screen {
 /* +1 is used to compensate null character of string */
 uint8_t characters[SCREEN_ROWS][SCREEN_COLUMNS+1];
+uint8_t charsets[SCREEN_ROWS][SCREEN_COLUMNS+1];
 uint8_t colors[SCREEN_ROWS][SCREEN_COLUMNS+1];
 uint8_t fonts[SCREEN_ROWS][SCREEN_COLUMNS+1];
 /*
@@ -123,6 +234,7 @@ typedef struct CCaptionSubContext {
 uint8_t cursor_column;
 uint8_t cursor_color;
 uint8_t cursor_font;
+uint8_t cursor_charset;
 AVBPrint buffer;
 int buffer_changed;
 int rollup;
@@ -189,6 +301,7 @@ static void flush_decoder(AVCodecContext *avctx)
 ctx->cursor_column = 0;
 ctx->cursor_font = 0;
 ctx->cursor_color = 0;
+ctx->cursor_charset = 0;
 ctx->active_screen = 0;
 ctx->last_real_time = 0;
 ctx->screen_touched = 0;
@@ -204,10 +317,13 @@ static int write_char(CCaptionSubContext *ctx, struct 
Screen *screen, char ch)
 uint8_t col = ctx->cursor_column;
 char *row = screen->characters[ctx->cursor_row];
 char *font = screen->fonts[ctx->cursor_row];
+char *charset = screen->charsets[ctx->cursor_row];
 
 if (col < SCREEN_COLUMNS) {
 row[col] = ch;
 font[col] = ctx->cursor_font;
+charset[col] = ctx->cursor_charset;
+ctx->cursor_charset = CCSET_BASIC_AMERICAN;
 if (ch) ctx->cursor_column++;
 return 0;
 }
@@ -306,6 +422,7 @@ static void roll_up(CCaptionSubContext *ctx)
 memcpy(screen->characters[i_row], screen->characters[i_row+1], 
SCREEN_COLUMNS);
 memcpy(screen->colors[i_row], screen->colors[i_row+1], SCREEN_COLUMNS);
 memcpy(screen->fonts[i_row], screen->fonts[i_row+1], SCREEN_COLUMNS);
+memcpy(screen->charsets[i_row], 

Re: [FFmpeg-devel] [PATCH v2 2/2] lavc/ccaption_dec: implement special and extended character sets

2016-02-13 Thread Clément Bœsch
On Sat, Feb 13, 2016 at 12:35:18PM -0800, Aman Gupta wrote:
[...]
> +static const char *charset_overrides[4][128] =
> +{
> +[CCSET_BASIC_AMERICAN] = {
> +[0x27] = "’",
> +[0x2a] = "á",
> +[0x5c] = "é",
> +[0x5e] = "í",
> +[0x5f] = "ó",
> +[0x60] = "ú",
> +[0x7b] = "ç",
> +[0x7c] = "÷",
> +[0x7d] = "Ñ",
> +[0x7e] = "ñ",
> +[0x7f] = "\u2588"
> +},
> +[CCSET_SPECIAL_AMERICAN] = {
> +[0x30] = "®",
> +[0x31] = "°",
> +[0x32] = "½",
> +[0x33] = "¿",
> +[0x34] = "™",
> +[0x35] = "¢",
> +[0x36] = "£",
> +[0x37] = "♪",
> +[0x38] = "à",
> +[0x39] = "\u00A0",
> +[0x3a] = "è",
> +[0x3b] = "â",
> +[0x3c] = "ê",
> +[0x3d] = "î",
> +[0x3e] = "ô",
> +[0x3f] = "û",
> +},
[...]

I'm not exactly comfortable about using an implicit coding in the code
itself... I feel like some character encoding at some random level (editor
sender side, mail sender side, mail receiver side, ...) could mess up the
encoding one way or another. Would you mind using the \u form everywhere?

[...]

-- 
Clément B.


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel