JSON与XML处理:数据交换格式完全指南
JSON与XML处理:数据交换格式完全指南
数据交换格式是不同系统之间传递数据的桥梁。JSON、XML和YAML是三种最常用的数据格式。Python内置了JSON支持,XML和YAML则需要第三方库。本文将全面介绍这些格式的处理方法。
JSON处理
JSON(JavaScript Object Notation)是目前最流行的数据交换格式,Python内置了json模块。
基本操作
import json
# Python对象转JSON字符串
data = {
"name": "张三",
"age": 25,
"scores": [85, 92, 78],
"address": {
"city": "北京",
"district": "海淀区"
},
"is_student": False,
"hobbies": None
}
# 转换为JSON字符串
json_str = json.dumps(data, ensure_ascii=False, indent=2)
print(json_str)
# 写入文件
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# JSON字符串转Python对象
json_string = '{"name": "李四", "age": 30}'
python_obj = json.loads(json_string)
print(python_obj["name"]) # 李四
# 从文件读取JSON
with open("data.json", "r", encoding="utf-8") as f:
loaded_data = json.load(f)
高级用法
import json
from datetime import datetime
# 自定义序列化
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
data = {
"created_at": datetime.now(),
"name": "测试"
}
json_str = json.dumps(data, cls=DateTimeEncoder, ensure_ascii=False)
print(json_str)
# 自定义反序列化
def datetime_decoder(dct):
for key, value in dct.items():
if isinstance(value, str) and "T" in value:
try:
dct[key] = datetime.fromisoformat(value)
except ValueError:
pass
return dct
json_str = '{"created_at": "2024-01-01T12:00:00", "name": "test"}'
obj = json.loads(json_str, object_hook=datetime_decoder)
print(type(obj["created_at"])) # <class 'datetime.datetime'>
# 处理特殊值
data = {"value": float("inf")}
# 默认情况下不能序列化无穷大,需要设置
json_str = json.dumps(data, allow_nan=True)
# 紧凑格式
data = {"a": 1, "b": 2, "c": 3}
print(json.dumps(data, separators=(',', ':'))) # {"a":1,"b":2,"c":3}
# 排序键
print(json.dumps(data, sort_keys=True))
错误处理
import json
# 处理JSON解析错误
try:
invalid_json = '{"name": "test", "age":}'
data = json.loads(invalid_json)
except json.JSONDecodeError as e:
print(f"JSON解析错误: {e.msg}")
print(f"位置: 第{e.lineno}行,第{e.colno}列")
# 处理编码错误
try:
json.dumps({"key": "value"}, ensure_ascii=True)
except UnicodeEncodeError as e:
print(f"编码错误: {e}")
# 检查JSON有效性
def is_valid_json(json_string):
try:
json.loads(json_string)
return True
except (json.JSONDecodeError, TypeError):
return False
print(is_valid_json('{"key": "value"}')) # True
print(is_valid_json("not json")) # False
XML处理
XML(可扩展标记语言)在企业级应用中广泛使用。Python提供了xml.etree.ElementTree模块,也可以使用第三方库lxml。
ElementTree基础
import xml.etree.ElementTree as ET
# 创建XML
root = ET.Element("catalog")
book = ET.SubElement(root, "book", attrib={"id": "1"})
ET.SubElement(book, "title").text = "Python编程"
ET.SubElement(book, "author").text = "张三"
ET.SubElement(book, "price").text = "59.99"
# 生成XML字符串
tree = ET.ElementTree(root)
ET.indent(tree)
xml_str = ET.tostring(root, encoding="unicode", xml_declaration=True)
print(xml_str)
# 写入文件
tree.write("catalog.xml", encoding="utf-8", xml_declaration=True)
# 解析XML
tree = ET.parse("catalog.xml")
root = tree.getroot()
# 遍历元素
for book in root.findall("book"):
book_id = book.get("id")
title = book.find("title").text
author = book.find("author").text
price = book.find("price").text
print(f"ID: {book_id}, 标题: {title}, 作者: {author}, 价格: {price}")
# 查找特定元素
first_book = root.find("book[@id='1']")
if first_book is not None:
print(f"找到书籍: {first_book.find('title').text}")
# 获取所有元素
all_books = root.findall(".//book")
print(f"共找到 {len(all_books)} 本书")
使用lxml
from lxml import etree
# 创建XML
root = etree.Element("library")
book = etree.SubElement(root, "book")
book.set("category", "fiction")
etree.SubElement(book, "title").text = "红楼梦"
etree.SubElement(book, "author").text = "曹雪芹"
# 格式化输出
xml_str = etree.tostring(root, pretty_print=True,
encoding="unicode", xml_declaration=True)
print(xml_str)
# 使用XPath查询
xml_data = """
<library>
<book category="fiction">
<title>红楼梦</title>
<author>曹雪芹</author>
<price>58</price>
</book>
<book category="science">
<title>时间简史</title>
<author>霍金</author>
<price>45</price>
</book>
</library>
"""
root = etree.fromstring(xml_data.encode())
# XPath查询
titles = root.xpath("//title/text()")
print(f"所有标题: {titles}")
expensive_books = root.xpath("//book[price>50]/title/text()")
print(f"价格超过50的书: {expensive_books}")
fiction_books = root.xpath("//book[@category='fiction']/title/text()")
print(f"小说类书籍: {fiction_books}")
# 验证XML(需要XSD)
schema = etree.XMLSchema(etree.parse("schema.xsd"))
is_valid = schema.validate(etree.parse("data.xml"))
YAML处理
YAML(YAML Ain't Markup Language)是一种可读性很强的数据序列化格式。
PyYAML基础
import yaml
# Python对象转YAML
data = {
"database": {
"host": "localhost",
"port": 5432,
"name": "mydb",
"credentials": {
"username": "admin",
"password": "secret"
}
},
"servers": [
{"name": "web1", "ip": "192.168.1.1"},
{"name": "web2", "ip": "192.168.1.2"}
],
"debug": False
}
# 生成YAML字符串
yaml_str = yaml.dump(data, default_flow_style=False, allow_unicode=True)
print(yaml_str)
# 写入文件
with open("config.yaml", "w", encoding="utf-8") as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True)
# 解析YAML
with open("config.yaml", "r", encoding="utf-8") as f:
loaded = yaml.safe_load(f)
print(f"数据库主机: {loaded['database']['host']}")
print(f"服务器数量: {len(loaded['servers'])}")
# 多文档YAML
yaml_content = """
---
document: 1
content: 第一个文档
---
document: 2
content: 第二个文档
"""
# 加载所有文档
documents = list(yaml.safe_load_all(yaml_content))
print(f"文档数量: {len(documents)}")
YAML安全加载
import yaml
# 危险:使用yaml.load可能执行任意代码
# yaml.load(untrusted_data) # 不要这样做!
# 安全:使用yaml.safe_load
safe_data = yaml.safe_load("""
name: test
value: 123
list:
- item1
- item2
""")
print(safe_data)
# 自定义标签处理
def int_constructor(loader, node):
return loader.construct_scalar(node)
# 安全的加载器
SafeLoader = yaml.SafeLoader
SafeLoader.add_constructor('!env', int_constructor)
格式转换
import json
import xml.etree.ElementTree as ET
import yaml
# JSON转XML
def json_to_xml(data, root_tag="root"):
root = ET.Element(root_tag)
def build_xml(parent, data):
if isinstance(data, dict):
for key, value in data.items():
child = ET.SubElement(parent, key)
build_xml(child, value)
elif isinstance(data, list):
for item in data:
child = ET.SubElement(parent, "item")
build_xml(child, item)
else:
parent.text = str(data)
build_xml(root, data)
return ET.tostring(root, encoding="unicode")
# XML转JSON
def xml_to_json(xml_string):
root = ET.fromstring(xml_string)
def build_dict(element):
result = {}
for child in element:
if len(child) > 0:
result[child.tag] = build_dict(child)
else:
result[child.tag] = child.text
return result
return {root.tag: build_dict(root)}
# 使用示例
json_data = {"person": {"name": "张三", "age": 25}}
xml_output = json_to_xml(json_data, "data")
print(xml_output)
xml_input = "<data><person><name>李四</name><age>30</age></person></data>"
json_output = xml_to_json(xml_input)
print(json.dumps(json_output, ensure_ascii=False))
总结
JSON、XML和YAML各有优势:JSON简洁高效,适合Web API;XML结构严谨,适合企业级应用;YAML可读性强,适合配置文件。掌握这三种格式的处理,能让你在不同场景下选择最合适的数据交换方式。