8.2 구조화된 텍스트 파일

  • 탭, 콤마, 수직바 와 같은 문자를 구분자로 사용한다. 여기서는 CSV를 다룬다
  • 태그를 <>로 둘러싼다. XML, HTML을 다룸
  • 구두점을 사용 JSON
  • 들여쓰기를 사용 YAML(마크업언어가 아니다)
  • 프로그램 설정 파일과 같은 여러가지 형식을 사용한다.

    8.2.1 CSV

  • 어떤 것은 콤마 대신 수직바 나 탭 문자 사용
  • 어떤 것은 이스케이프 시퀀스를 사용.
    import csv
    villains = [
    ['Doctor', 'No'],
    ['Rosa', 'Klebb'],
    ['Mister', 'Big'],
    ['Auric', 'Goldfinger'],
    ['Ernst', 'Blofeld'],
    ]
    
    with open('villains', 'wt') as f:
    csvout = csv.writer(f)
    csvout.writerows(villains)
    
    with open('villains', 'rt') as f:
    cin = csv.reader(f)
    villains_2 = [row for row in cin]
    
    villains_2
    

    [[‘Doctor’, ‘No’], [‘Rosa’, ‘Klebb’], [‘Mister’, ‘Big’], [‘Auric’, ‘Goldfinger’], [‘Ernst’, ‘Blofeld’]]

    with open('villains', 'rt') as f:
    cin = csv.DictReader(f, fieldnames=['first', 'last'])
    villains_3 = [row for row in cin]
    
    print(villains_3)
    

    [OrderedDict([(‘first’, ‘Doctor’), (‘last’, ‘No’)]), OrderedDict([(‘first’, ‘Rosa’), (‘last’, ‘Klebb’)]), OrderedDict([(‘first’, ‘Mister’), (‘last’, ‘Big’)]), OrderedDict([(‘first’, ‘Auric’), (‘last’, ‘Goldfinger’)]), OrderedDict([(‘first’, ‘Ernst’), (‘last’, ‘Blofeld’)])]

    villains = [
    {'first': 'Doctor', 'last':'No'},
    {'first': 'Rosa', 'last':'Klebb'},
    {'first': 'Mister', 'last':'Big'},
    {'first': 'Auric', 'last':'Goldfinger'},
    {'first': 'Ernst', 'last':'Blofeld'},
    ]
    
    with open('villains', 'wt') as f:
    cout = csv.DictWriter(f, ['first', 'last'])
    cout.writeheader()
    cout.writerows(villains)
    
    with open('villains', 'rt') as f:
    cin = csv.DictReader(f)
    villains = [row for row in cin]
    
    villains
    

    [OrderedDict([(‘first’, ‘Doctor’), (‘last’, ‘No’)]), OrderedDict([(‘first’, ‘Rosa’), (‘last’, ‘Klebb’)]), OrderedDict([(‘first’, ‘Mister’), (‘last’, ‘Big’)]), OrderedDict([(‘first’, ‘Auric’), (‘last’, ‘Goldfinger’)]), OrderedDict([(‘first’, ‘Ernst’), (‘last’, ‘Blofeld’)])]

    8.2.2 XML

    8.2.4 JSON

    menu = '''
    {
    "breakfast":{
    "hours":"7-11",
    "items":{
    "breakfast burritos": $6.00",
    "pancakes": "$4.00"
    },
    "lunch" : {
    "hours":"11-3",
    "items":{
    "hamburger": "$5.00"
    }
    "dinner":{
    "hours":"3-10",
    "items" : {
    "spaghetti": "$8.00"
    }
    }
    }
    }
    }
    '''
    
    import json
    
    menu_json = json.dumps(menu)    
    print(menu_json)
    

    “\n{\n"breakfast":{\n "hours":"7-11",\n "items":{\n "breakfast burritos": $6.00",\n "pancakes": "$4.00"\n },\n"lunch" : {\n "hours":"11-3",\n "items":{\n "hamburger": "$5.00"\n }\n"dinner":{\n "hours":"3-10",\n "items" : {\n "spaghetti": "$8.00"\n }\n }\n }\n }\n}\n”

    menu2 = json.loads(menu_json)
    menu2
    

    ‘\n{\n”breakfast”:{\n “hours”:”7-11”,\n “items”:{\n “breakfast burritos”: $6.00”,\n “pancakes”: “$4.00”\n },\n”lunch” : {\n “hours”:”11-3”,\n “items”:{\n “hamburger”: “$5.00”\n }\n”dinner”:{\n “hours”:”3-10”,\n “items” : {\n “spaghetti”: “$8.00”\n }\n }\n }\n }\n}\n’

    import datetime
    now = datetime.datetime.utcnow()
    
    now
    

    datetime.datetime(2018, 2, 22, 15, 19, 18, 662590)

    json.dumps(now)
    
TypeError Traceback (most recent call last)
<ipython-input-27-f164a08299e1> in <module>()
----> 1 json.dumps(now)

~/.pyenv/versions/3.6.4/lib/python3.6/json/__init__.py in dumps(obj,  skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent,  separators, default, sort_keys, **kw)
229         cls is None and indent is None and separators is None and
230         default is None and not sort_keys and not kw):
--> 231         return _default_encoder.encode(obj)
232     if cls is None:
233         cls = JSONEncoder
~/.pyenv/versions/3.6.4/lib/python3.6/json/encoder.py in encode(self, o)
197         # exceptions aren't as detailed.  The list call should be roughly
198         # equivalent to the PySequence_Fast that ''.join() would do.
--> 199         chunks = self.iterencode(o, _one_shot=True)
200         if not isinstance(chunks, (list, tuple)):
201             chunks = list(chunks)
~/.pyenv/versions/3.6.4/lib/python3.6/json/encoder.py in iterencode(self, o, _one_shot)
255                 self.key_separator, self.item_separator, self.sort_keys,
256                 self.skipkeys, _one_shot)
--> 257         return _iterencode(o, 0)
258
259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
~/.pyenv/versions/3.6.4/lib/python3.6/json/encoder.py in default(self, o)
178         """
179         raise TypeError("Object of type '%s' is not JSON serializable" %
--> 180                         o.__class__.__name__)
181
182     def encode(self, o):
TypeError: Object of type 'datetime' is not JSON serializable
now_str = str(now)
json.dumps(now_str)

‘“2018-02-22 15:19:18.662590”’

from time import mktime
now_epoch = int(mktime(now.timetuple()))
json.dumps(now_epoch)

‘1519280358’

class DTEncode(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return int(mktime(obj.timetuple()))
return json.JSONEncoder.default(self, obj)
json.dumps(now, cls=DTEncode)

‘1519280358’

type(now)

datetime.datetime

isinstance(now, datetime.datetime)

True

type(234)

int

isinstance(234, int)

True

8.2.5 YAML

8.2.6 보안노트

8.2.7 설정파일

8,2.8 기타 데이터 교환 형식

8.2.9 직렬화하기:pickle

import pickle
import datetime
now1 = datetime.datetime.utcnow()
pickled = pickle.dumps(now1)
now2 = pickle.loads(pickled)
now1

datetime.datetime(2018, 2, 22, 15, 28, 28, 445235)

now2

datetime.datetime(2018, 2, 22, 15, 28, 28, 445235)

class Tiny():
def __str__(self):
return 'tiny'
obj1 = Tiny()
obj1

<main.Tiny at 0x7faa94fa6f28>

str(obj1)

‘tiny’

pickled = pickle.dumps(obj1)
pickled

b’\x80\x03c__main__\nTiny\nq\x00)\x81q\x01.’

obj2 = pickle.loads(pickled)
obj2

<main.Tiny at 0x7faa9c3f5be0>

str(obj2)

‘tiny’

Comments