Logo Search packages:      
Sourcecode: khmerconverter version File versions

def modules::FontDataXML::FontData::XMLDataError::__init__ (   self  ) 

constructor reads the xml file into class variables 

Definition at line 70 of file FontDataXML.py.

00070                       :
        """ constructor reads the xml file into class variables """
        # only read if variables are empty
        if (not FontData.fontNames):
            self.readXML("fontdata.xml")

    def listFontTypes(self):
        """return sorted list of font types: ("abc-zwsp", "abc family", "baidok family", "limon family", "fk family", "truth family", "khek family", ...) """
        types = list(set(FontData.fontNames.values()))
        types.sort()
        return types

    def listFontNames(self):
        """return sorted list of all known font names ("Limon S1", "Baidok3c", ...) """
        names = FontData.fontNames.keys()
        names.sort()
        return names

    def listFontNamesForType(self, fonttype):
        """return sorted list of all known font names for a font type """
        nameList = []
        for name, type in FontData.fontNames.iteritems():
            if ((type == fonttype) and (beautify(type) != name)):
                nameList.append(name)
        nameList.sort()
        return nameList
    
    def typeForFontname(self, fontname):
        """ return fonttype for fontname  """
        name = beautify(fontname)
        if (not FontData.fontNames.has_key(name)):
            raise self.FontNotFoundError("Font: " + name + " is unknown.")
        return FontData.fontNames[name]

    def isConvertable(self, fontname):
        """return True if fontname is known, else return False"""
        try:
            self.typeForFontname(fontname)
        except:
            return False
        return True
        
    def defaultFont(self, fonttype):
        """return default font name according to fontname"""
        if not FontData.fontElements.has_key(fonttype):
            return fonttype
        element = FontData.fontElements[fonttype]
        fontname = element.getAttribute("default")
        if (fontname):
            return fontname
        return fonttype

    def unicodeData(self, fontname):
        """return data for unicode FontData according to fontname"""
        try:
            fonttype = self.typeForFontname(fontname)
        except self.FontNotFoundError:
            raise
            
        # read if data not available
        if (not FontData.unicodeFontData.has_key(fonttype)):
            self.__readUnicodeData(fonttype)

        return FontData.unicodeFontData[fonttype]

    def legacyData(self, fontname):
        """return data for legacy FontData according to fontname"""
        try:
            fonttype = self.typeForFontname(fontname)
        except self.FontNotFoundError:
            raise

        # read if data not available
        if (not FontData.legacyFontData.has_key(fonttype)):
            self.__readLegacyData(fonttype)

        return FontData.legacyFontData[fonttype]

    # List and Check Encoding
    encodingData = ["cp1252", "utf-8", "latin-1", "iso-8859-1"]
    
    def listEncodingTypes(self):
        """return list of encodingData for display"""
        return ["Plain Text (cp1252)", "Plain Text (latin-1/iso-8859-1)", "Unicode (utf-8)"]

    def canDecode(self, encoding):
        """return True if encoding is in encodingData, else return False"""
        return encoding.lower() in self.encodingData

    # convert from other encoding to cp1252
    def changeEncoding(self, sin, encoding):
        """if encoding is in encodingData but not cp1252,
            change encoding to cp1252 if
            return sin """
        if (self.canDecode(encoding) and encoding != 'cp1252'):
            try:                
                sin = sin.decode(encoding)
                sin = sin.encode('cp1252')
            except UnicodeEncodeError:
                    raise TypeError("Codecs Error")
        return sin
        
    def __decodeLegacy(self, attribute):
        """convert the legacy attribute from number to string"""
        s = ''
        l = string.split(attribute, LEGSEP);
        for piece in l:
            if len(piece) > 0:
                s += chr(eval(piece))
        return s #.encode('cp1252')

    def readXML(self, filename):
        try:
            datasource = open(filename)
        except IOError:
            try:
                datasource = open('modules/' + filename)
            except IOError:
                raise IOError('Cannot open ' + filename + ' for reading!')

        FontData.dom = parse(datasource)
        FontData.fontNames = dict()
        FontData.fontElements = dict()
        FontData.legacyFontData = dict()
        FontData.unicodeFontData = dict()   
        FontData.parents = dict()

        fonts = FontData.dom.getElementsByTagName("font")
        if (len(fonts) == 0):
            raise self.XMLDataError("no Fonts found in " + filename)

        for font in fonts:
            fonttype = font.getAttribute("type").lower()
            if (FontData.fontElements.has_key(fonttype)):
                raise self.XMLDataError("Font: " + fonttype + " is defined twice in " + filename)
            
            inherit = font.getAttribute("inherit").lower()
            if (inherit):
                if (not FontData.fontElements.has_key(inherit)):
                    raise self.XMLDataError("Font " + fonttype + " can not inherit unkown font " + inherit + " in " + filename)
                # map font to parent
                FontData.parents[fonttype] = inherit

            # map name to element
            FontData.fontElements[fonttype] = font
            hidden = (font.getAttribute("hidden").lower() == 'true')
            if (not hidden):
                # add default fonttype to known fontnames
                FontData.fontNames[beautify(fonttype)] = fonttype
                # add alias names 
                aliases = font.getElementsByTagName("alias")
                for alias in aliases:
                    FontData.fontNames[beautify(alias.getAttribute("name"))] = fonttype
                    
    def __readUnicodeData(self, fonttype):
        """ reads the unicode data for one font from the dom tree """
        if (not FontData.fontElements.has_key(fonttype)):
            raise self.FontNotFoundError("Font: " + fonttype + " is unknown.")
        font = FontData.fontElements[fonttype]

        # check and resolve inheritance
        if (FontData.parents.has_key(fonttype)):
            parent = FontData.parents[fonttype]
            # do we need to load the data?
            if (not FontData.unicodeFontData.has_key(parent)):
                self.__readUnicodeData(parent)
                
            # copy variables from parent
            unicodeDicts = list()
            for d in FontData.unicodeFontData[parent][0]:
                unicodeDicts.append(d.copy())
            unicodeTable = list(FontData.unicodeFontData[parent][1])
        else:
            # init variables
            unicodeDicts = list()
            unicodeTable = ["" for i in range(MAXUNI)]

        maps = font.getElementsByTagName("maps")
        if (len(maps) > 0):
            self.__readGlobalUni(maps[0], unicodeTable, unicodeDicts)
            self.__readFromUnicode(maps[0], unicodeDicts)

        FontData.unicodeFontData[fonttype] = (unicodeDicts, unicodeTable)

    def __readLegacyData(self, fonttype):
        """ reads the legacy data for one font from the dom tree """
        if (not FontData.fontElements.has_key(fonttype)):
            raise self.FontNotFoundError("Font: " + fonttype + " is unknown.")
        font = FontData.fontElements[fonttype]

        # check and resolve inheritance
        if (FontData.parents.has_key(fonttype)):
            parent = FontData.parents[fonttype]
            # do we need to load the data?
            if (not FontData.legacyFontData.has_key(parent)):
                self.__readLegacyData(parent)
                
            # copy variables from parent
            legacyDict = FontData.legacyFontData[parent][0].copy()
            legacyTable = list(FontData.legacyFontData[parent][1])
        else:
            # init variables
            legacyDict = dict()
            legacyTable = [unichr(i) for i in range(MAXLEG)]

        maps = font.getElementsByTagName("maps")
        if (len(maps) > 0):
            self.__readGlobal(maps[0], legacyTable, legacyDict)
            self.__readToUnicode(maps[0], legacyDict)

        FontData.legacyFontData[fonttype] = [legacyDict, legacyTable]

    def __readToUnicode(self, element, legacyDict):
        """ read the legacy replacements """
        maps = element.getElementsByTagName("tounicode")
        if (len(maps) < 1):
            return

        for map in maps[0].getElementsByTagName("map"):
            unicode = map.getAttribute("unicode")
            legacy = self.__decodeLegacy(map.getAttribute("legacy").encode("cp1252"))
            l = len(legacy)
            if (l > 0 and l < MAXLENGTH):
                if (not legacyDict.has_key(legacy)):
                    legacyDict[legacy] = unicode
                else:
                    raise self.XMLDataError("Legacy character " + legacy + " defined twice in toUnicode.")

    def __readFromUnicode(self, element, unicodeDicts):
        """ read the unicode replacements """
        maps = element.getElementsByTagName("fromunicode")
        if (len(maps) < 1):
            return

        for map in maps[0].getElementsByTagName("map"):
            unicode = map.getAttribute("unicode")
            legacy = self.__decodeLegacy(map.getAttribute("legacy"))
            l = len(unicode)
            if (l > 0 and l < MAXLENGTH):
                self.__addToUniData(unicode, legacy, unicodeDicts)


    def __readGlobalUni(self, element, unicodeTable, unicodeDicts):
        """ read the global replacements for unicode """
        maps = element.getElementsByTagName("global")
        if (len(maps) < 1):
            return

        for map in maps[0].getElementsByTagName("map"):
            unicode = map.getAttribute("unicode")
            legacy = self.__decodeLegacy(map.getAttribute("legacy"))
            l = len(unicode)
            if (l == 1):
                i = ord(unicode) - 0x1780
                if (i >= 0 and i < MAXUNI):
                    if (unicodeTable[i] == ""):
                        unicodeTable[i] = legacy
                    else:
                        raise self.XMLDataError("Unicode character " + ord(unicode).__hex__() + " defined twice in global.")
                else:
                    self.__addToUniData(unicode, legacy, unicodeDicts)
            else:
                if (l > 1 and l < MAXLENGTH):
                    self.__addToUniData(unicode, legacy, unicodeDicts)

    def __readGlobal(self, element, legacyTable, legacyDict):
        """ read the global replacements for legacy """
        maps = element.getElementsByTagName("global")
        if (len(maps) < 1):
            return

        for map in maps[0].getElementsByTagName("map"):
            legacy = self.__decodeLegacy(map.getAttribute("legacy").encode("cp1252"))
            unicode = map.getAttribute("unicode")
            l = len(legacy)
            if (l == 1):
                i = ord(legacy)
                if (i >= 0 and i < MAXLEG):
                    if (legacyTable[i] == unichr(i)):
                        legacyTable[i] = unicode
                    else:
                        raise self.XMLDataError("Legacy character " + i.__hex__() + " defined twice in global.")
            elif (l > 0 and l < MAXLENGTH):
                if (not legacyDict.has_key(legacy)):
                    legacyDict[legacy] = unicode
                else:
                    raise self.XMLDataError("Legacy character " + legacy + " defined twice in global.")


    def __addToUniData(self, unicode, legacy, data):
        """ put the unicode to legacy mapping in the right dict.
            data will get new dicts if needed """
        l = len(unicode)
        # sanity check 
        if (l > 0 and l < MAXLENGTH):
            # make sure we have enough dict's    
            while (len(data) < l):
                data.append(dict())
            # insert into dict
            if (not data[l - 1].has_key(unicode)):
                data[l - 1][unicode] = legacy
            else:
                raise self.XMLDataError("Unicode string " + unicode + " already in datastructure.")


# testing

class TestFontData(unittest.TestCase):


Generated by  Doxygen 1.6.0   Back to index