본문 바로가기

Java Programming/Java

Java / XML 파싱(parsing) , DocumentBuilderFactory, DocumentBuilder 이용하기

아래 XML 파일에서 tmx  tag의 데이터를 추출해보도록하겠습니다!
 
package Dom;
 
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
 
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
 
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
 
public class DomThread extends Thread {
 
    // 다운로드 받은 문자열을 저장할 변수
    private String xml;
    // 파싱한 결과를 저장할 리스트 - 몇개인지 모르므로 배열이 아니고 ArrayList로 만들어야 합니다.
    List<String> data = new ArrayList<String>();
 
    public void run() {
 
        try {
            // 연결+옵션설정
            String addr = "http://www.kma.go.kr/weather/forecast/mid-term-xml.jsp?stnId=109";
            URL url = new URL(addr);
            HttpURLConnection http = (HttpURLConnection)url.openConnection();
            http.setConnectTimeout(10000);
            http.setUseCaches(false);
 
            // 위 부분 까지는 주소만 변경되고 모든 경우 동일
 
            // 위 주소에서 주는 데이터를 문자열로 읽기 위한 스트림 객체 생성
            BufferedReader br = new BufferedReader(new InputStreamReader(http.getInputStream()));
            StringBuilder sb = new StringBuilder();
            while (true) {
                String line = br.readLine();
                if (line == null)
                    break;
                sb.append(line);
            }
            xml = sb.toString();
            br.close();
            http.disconnect();
 
        } catch (Exception e) {
            System.out.println("다운로드에러" + e.getMessage());
 
        }
        // System.out.println(xml);
 
        // 파싱
        try {
            // 자신의 static 메서드를 가지고 객체를 생성 : 싱글톤 패턴
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            // 다른 클래스의 객체를 가지고, 객체를 생성하면 팩토리 패턴.
            DocumentBuilder documentbuilder = factory.newDocumentBuilder(); //// 팩토리 메서드 패턴  공장에서 찍어줌
            // 문자열을 InputStream으로 변환
            InputStream is = new ByteArrayInputStream(xml.getBytes());
            Document doc = documentbuilder.parse(is);
            // xml을 메모리에 펼쳐놓고 루트를 elemnt에 저장
            Element element = doc.getDocumentElement();
 
            // 파싱할 태그의 리스트를 찾아온다
            // tmx 태그 전체를 list에 저장
            NodeList list = element.getElementsByTagName("tmx");
            // 리스트를 순회하면서 데이터를 data에 추가
            for (int i = 0; i < list.getLength(); i++) {
                // i번째 tmx 태그를 node에 저장
                Node node = list.item(i);
                // 태그 내의 첫번째 값 앞으로 이동
                Node temp = node.getFirstChild();
                // 태그 내의 첫번째 값을 value에 저장
                String value = temp.getNodeValue();
                // 값을 data에 저장
                data.add(value);
 
            }
 
        } catch (Exception e) {
            System.out.println("파싱에러" + e.getMessage());
        }
 
        // data의 내용을 출력 - 단순 확인만 하는 경우
        // 컬렉션의 toString은 각 데이터의 toString을 다시 호출
        System.out.println(data);
 
        for(String imsi : data){
            System.out.println(imsi);
        }
    }
}


package Dom;
 
public class DomMian {
 
    public static void main(String[] args) {
        // TODO Auto-generated method stub
        DomThread dom = new DomThread();
        dom.start();
    }
 
}

출처 : http://blog.naver.com/PostView.nhn?blogId=tkddlf4209&logNo=220486989575&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView