Disappearance
From ActiveArchives
This script takes a subtitle file (.srt) as parameter and removes all but the first one of its word occurrences.
Licence: GPL3
Updated version available as a repository
#! /usr/bin/env python # Copyright 2010 the Active Archives contributors. # See the file AUTHORS for more details. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import sys import re from optparse import OptionParser from textwrap import dedent # inits usage and arguments usage = "usage: %prog in.srt > out.srt" parser = OptionParser(usage=usage) (options, args) = parser.parse_args() # checks the arguments length if len(args) != 1: parser.error("incorrect number of arguments") # opens the subtitle file and reads its content try: f = open(args[0], 'r') except IOError: sys.exit(dedent("""\ Error: I can't open your subtitle file. Are you sure it exists?\ """)) else: subtitles = f.read() f.close() pattern = re.compile(r'[a-zA-Z]+', re.UNICODE) # creates a pattern to search words used_words = [] new_text = "" previous_end = 0 # replaces all the occurrences of the words but the first one for match in pattern.finditer(subtitles): new_text += subtitles[previous_end:match.start()] word = match.group() if word.lower() in used_words: #new_text += "*{w}*".format(w=word) #new_text += "<del>{w}</del>".format(w=word) #new_text += "--{w}--".format(w=word) new_text += "{w}".format(w="".ljust(len(word), " ")) else: new_text += word used_words.append(word.lower()) previous_end = match.end() new_text += subtitles[previous_end:len(subtitles)] print(new_text)