#! venv/bin/python import psycopg2 import psycopg2.extras import pathlib import os.path import datetime import pypandoc from bs4 import BeautifulSoup from datetime import timedelta, datetime, tzinfo #from langdetect import detect import sys import re conn = psycopg2.connect(database="cccms_dev", user="postgres", password="", host="127.0.0.1") cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cursor.execute("select * from page_translations") page_translations = cursor.fetchall() fo = open("orig.txt", "w") fm = open("modi.txt", "w") for translation in page_translations: body = translation.get('body') cursor.execute("select * from pages where id = %s", [translation['page_id']]) page = cursor.fetchone() if not body: continue if not '